aboutsummaryrefslogtreecommitdiff
path: root/llvm/lib/CodeGen
diff options
context:
space:
mode:
authorNAKAMURA Takumi <geek4civic@gmail.com>2025-01-09 18:31:57 +0900
committerNAKAMURA Takumi <geek4civic@gmail.com>2025-01-09 18:33:27 +0900
commitdf025ebf872052c0761d44a3ef9b65e9675af8a8 (patch)
tree9b4e94583e2536546d6606270bcdf846c95e1ba2 /llvm/lib/CodeGen
parent4428c9d0b1344179f85a72e183a44796976521e3 (diff)
parentbdcf47e4bcb92889665825654bb80a8bbe30379e (diff)
downloadllvm-users/chapuni/cov/single/loop.zip
llvm-users/chapuni/cov/single/loop.tar.gz
llvm-users/chapuni/cov/single/loop.tar.bz2
Merge branch 'users/chapuni/cov/single/base' into users/chapuni/cov/single/loopusers/chapuni/cov/single/loop
Conflicts: clang/lib/CodeGen/CoverageMappingGen.cpp
Diffstat (limited to 'llvm/lib/CodeGen')
-rw-r--r--llvm/lib/CodeGen/AsmPrinter/AsmPrinter.cpp27
-rw-r--r--llvm/lib/CodeGen/AsmPrinter/DwarfDebug.cpp1
-rw-r--r--llvm/lib/CodeGen/AsmPrinter/DwarfUnit.h10
-rw-r--r--llvm/lib/CodeGen/CodeGenPrepare.cpp2
-rw-r--r--llvm/lib/CodeGen/ComplexDeinterleavingPass.cpp290
-rw-r--r--llvm/lib/CodeGen/ExpandMemCmp.cpp28
-rw-r--r--llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp45
-rw-r--r--llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp82
-rw-r--r--llvm/lib/CodeGen/GlobalISel/Utils.cpp15
-rw-r--r--llvm/lib/CodeGen/LiveRegMatrix.cpp2
-rw-r--r--llvm/lib/CodeGen/MIRSampleProfile.cpp5
-rw-r--r--llvm/lib/CodeGen/MachineBlockPlacement.cpp2
-rw-r--r--llvm/lib/CodeGen/MachineBranchProbabilityInfo.cpp2
-rw-r--r--llvm/lib/CodeGen/MachineOperand.cpp5
-rw-r--r--llvm/lib/CodeGen/MachineRegisterInfo.cpp8
-rw-r--r--llvm/lib/CodeGen/MachineTraceMetrics.cpp9
-rw-r--r--llvm/lib/CodeGen/PostRASchedulerList.cpp39
-rw-r--r--llvm/lib/CodeGen/ReachingDefAnalysis.cpp144
-rw-r--r--llvm/lib/CodeGen/RegAllocGreedy.cpp23
-rw-r--r--llvm/lib/CodeGen/RegAllocPriorityAdvisor.cpp35
-rw-r--r--llvm/lib/CodeGen/RegAllocPriorityAdvisor.h14
-rw-r--r--llvm/lib/CodeGen/RegisterCoalescer.cpp75
-rw-r--r--llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp60
-rw-r--r--llvm/lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp77
-rw-r--r--llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp2
-rw-r--r--llvm/lib/CodeGen/SelectionDAG/ScheduleDAGRRList.cpp6
-rw-r--r--llvm/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.cpp6
-rw-r--r--llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp15
-rw-r--r--llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp3
-rw-r--r--llvm/lib/CodeGen/StackMapLivenessAnalysis.cpp4
-rw-r--r--llvm/lib/CodeGen/SwiftErrorValueTracking.cpp2
-rw-r--r--llvm/lib/CodeGen/TargetPassConfig.cpp6
32 files changed, 779 insertions, 265 deletions
diff --git a/llvm/lib/CodeGen/AsmPrinter/AsmPrinter.cpp b/llvm/lib/CodeGen/AsmPrinter/AsmPrinter.cpp
index 7bd3fb3..3ba4590 100644
--- a/llvm/lib/CodeGen/AsmPrinter/AsmPrinter.cpp
+++ b/llvm/lib/CodeGen/AsmPrinter/AsmPrinter.cpp
@@ -3914,21 +3914,22 @@ static void emitGlobalConstantImpl(const DataLayout &DL, const Constant *CV,
if (isa<ConstantAggregateZero>(CV)) {
StructType *structType;
if (AliasList && (structType = llvm::dyn_cast<StructType>(CV->getType()))) {
- // Handle cases of aliases to direct struct elements
- const StructLayout *Layout = DL.getStructLayout(structType);
- uint64_t SizeSoFar = 0;
- for (unsigned int i = 0, n = structType->getNumElements(); i < n - 1;
- ++i) {
- uint64_t GapToNext = Layout->getElementOffset(i + 1) - SizeSoFar;
- AP.OutStreamer->emitZeros(GapToNext);
- SizeSoFar += GapToNext;
- emitGlobalAliasInline(AP, Offset + SizeSoFar, AliasList);
+ unsigned numElements = {structType->getNumElements()};
+ if (numElements != 0) {
+ // Handle cases of aliases to direct struct elements
+ const StructLayout *Layout = DL.getStructLayout(structType);
+ uint64_t SizeSoFar = 0;
+ for (unsigned int i = 0; i < numElements - 1; ++i) {
+ uint64_t GapToNext = Layout->getElementOffset(i + 1) - SizeSoFar;
+ AP.OutStreamer->emitZeros(GapToNext);
+ SizeSoFar += GapToNext;
+ emitGlobalAliasInline(AP, Offset + SizeSoFar, AliasList);
+ }
+ AP.OutStreamer->emitZeros(Size - SizeSoFar);
+ return;
}
- AP.OutStreamer->emitZeros(Size - SizeSoFar);
- return;
- } else {
- return AP.OutStreamer->emitZeros(Size);
}
+ return AP.OutStreamer->emitZeros(Size);
}
if (isa<UndefValue>(CV))
diff --git a/llvm/lib/CodeGen/AsmPrinter/DwarfDebug.cpp b/llvm/lib/CodeGen/AsmPrinter/DwarfDebug.cpp
index e1291e2..11de4b6 100644
--- a/llvm/lib/CodeGen/AsmPrinter/DwarfDebug.cpp
+++ b/llvm/lib/CodeGen/AsmPrinter/DwarfDebug.cpp
@@ -3789,6 +3789,7 @@ void DwarfDebug::addDwarfTypeUnitType(DwarfCompileUnit &CU,
// they depend on addresses, throwing them out and rebuilding them.
setCurrentDWARF5AccelTable(DWARF5AccelTableKind::CU);
CU.constructTypeDIE(RefDie, cast<DICompositeType>(CTy));
+ CU.updateAcceleratorTables(CTy->getScope(), CTy, RefDie);
return;
}
diff --git a/llvm/lib/CodeGen/AsmPrinter/DwarfUnit.h b/llvm/lib/CodeGen/AsmPrinter/DwarfUnit.h
index 0225654..1632053 100644
--- a/llvm/lib/CodeGen/AsmPrinter/DwarfUnit.h
+++ b/llvm/lib/CodeGen/AsmPrinter/DwarfUnit.h
@@ -315,6 +315,11 @@ public:
/// Get context owner's DIE.
DIE *createTypeDIE(const DICompositeType *Ty);
+ /// If this is a named finished type then include it in the list of types for
+ /// the accelerator tables.
+ void updateAcceleratorTables(const DIScope *Context, const DIType *Ty,
+ const DIE &TyDIE);
+
protected:
~DwarfUnit();
@@ -357,11 +362,6 @@ private:
virtual void finishNonUnitTypeDIE(DIE& D, const DICompositeType *CTy) = 0;
- /// If this is a named finished type then include it in the list of types for
- /// the accelerator tables.
- void updateAcceleratorTables(const DIScope *Context, const DIType *Ty,
- const DIE &TyDIE);
-
virtual bool isDwoUnit() const = 0;
const MCSymbol *getCrossSectionRelativeBaseAddress() const override;
diff --git a/llvm/lib/CodeGen/CodeGenPrepare.cpp b/llvm/lib/CodeGen/CodeGenPrepare.cpp
index 5c712e4..ba1b10e 100644
--- a/llvm/lib/CodeGen/CodeGenPrepare.cpp
+++ b/llvm/lib/CodeGen/CodeGenPrepare.cpp
@@ -152,7 +152,7 @@ static cl::opt<bool>
static cl::opt<bool>
EnableAndCmpSinking("enable-andcmp-sinking", cl::Hidden, cl::init(true),
- cl::desc("Enable sinkinig and/cmp into branches."));
+ cl::desc("Enable sinking and/cmp into branches."));
static cl::opt<bool> DisableStoreExtract(
"disable-cgp-store-extract", cl::Hidden, cl::init(false),
diff --git a/llvm/lib/CodeGen/ComplexDeinterleavingPass.cpp b/llvm/lib/CodeGen/ComplexDeinterleavingPass.cpp
index f3f7ea9..aec8df9 100644
--- a/llvm/lib/CodeGen/ComplexDeinterleavingPass.cpp
+++ b/llvm/lib/CodeGen/ComplexDeinterleavingPass.cpp
@@ -108,6 +108,13 @@ static bool isNeg(Value *V);
static Value *getNegOperand(Value *V);
namespace {
+template <typename T, typename IterT>
+std::optional<T> findCommonBetweenCollections(IterT A, IterT B) {
+ auto Common = llvm::find_if(A, [B](T I) { return llvm::is_contained(B, I); });
+ if (Common != A.end())
+ return std::make_optional(*Common);
+ return std::nullopt;
+}
class ComplexDeinterleavingLegacyPass : public FunctionPass {
public:
@@ -144,6 +151,7 @@ private:
friend class ComplexDeinterleavingGraph;
using NodePtr = std::shared_ptr<ComplexDeinterleavingCompositeNode>;
using RawNodePtr = ComplexDeinterleavingCompositeNode *;
+ bool OperandsValid = true;
public:
ComplexDeinterleavingOperation Operation;
@@ -160,7 +168,11 @@ public:
SmallVector<RawNodePtr> Operands;
Value *ReplacementNode = nullptr;
- void addOperand(NodePtr Node) { Operands.push_back(Node.get()); }
+ void addOperand(NodePtr Node) {
+ if (!Node || !Node.get())
+ OperandsValid = false;
+ Operands.push_back(Node.get());
+ }
void dump() { dump(dbgs()); }
void dump(raw_ostream &OS) {
@@ -194,6 +206,8 @@ public:
PrintNodeRef(Op);
}
}
+
+ bool areOperandsValid() { return OperandsValid; }
};
class ComplexDeinterleavingGraph {
@@ -293,7 +307,7 @@ private:
NodePtr submitCompositeNode(NodePtr Node) {
CompositeNodes.push_back(Node);
- if (Node->Real && Node->Imag)
+ if (Node->Real)
CachedResult[{Node->Real, Node->Imag}] = Node;
return Node;
}
@@ -327,6 +341,8 @@ private:
/// i: ai - br
NodePtr identifyAdd(Instruction *Real, Instruction *Imag);
NodePtr identifySymmetricOperation(Instruction *Real, Instruction *Imag);
+ NodePtr identifyPartialReduction(Value *R, Value *I);
+ NodePtr identifyDotProduct(Value *Inst);
NodePtr identifyNode(Value *R, Value *I);
@@ -396,6 +412,7 @@ private:
/// * Deinterleave the final value outside of the loop and repurpose original
/// reduction users
void processReductionOperation(Value *OperationReplacement, RawNodePtr Node);
+ void processReductionSingle(Value *OperationReplacement, RawNodePtr Node);
public:
void dump() { dump(dbgs()); }
@@ -891,17 +908,163 @@ ComplexDeinterleavingGraph::identifySymmetricOperation(Instruction *Real,
}
ComplexDeinterleavingGraph::NodePtr
-ComplexDeinterleavingGraph::identifyNode(Value *R, Value *I) {
- LLVM_DEBUG(dbgs() << "identifyNode on " << *R << " / " << *I << "\n");
- assert(R->getType() == I->getType() &&
- "Real and imaginary parts should not have different types");
+ComplexDeinterleavingGraph::identifyDotProduct(Value *V) {
+
+ if (!TL->isComplexDeinterleavingOperationSupported(
+ ComplexDeinterleavingOperation::CDot, V->getType())) {
+ LLVM_DEBUG(dbgs() << "Target doesn't support complex deinterleaving "
+ "operation CDot with the type "
+ << *V->getType() << "\n");
+ return nullptr;
+ }
+
+ auto *Inst = cast<Instruction>(V);
+ auto *RealUser = cast<Instruction>(*Inst->user_begin());
+
+ NodePtr CN =
+ prepareCompositeNode(ComplexDeinterleavingOperation::CDot, Inst, nullptr);
+
+ NodePtr ANode;
+
+ const Intrinsic::ID PartialReduceInt =
+ Intrinsic::experimental_vector_partial_reduce_add;
+
+ Value *AReal = nullptr;
+ Value *AImag = nullptr;
+ Value *BReal = nullptr;
+ Value *BImag = nullptr;
+ Value *Phi = nullptr;
+
+ auto UnwrapCast = [](Value *V) -> Value * {
+ if (auto *CI = dyn_cast<CastInst>(V))
+ return CI->getOperand(0);
+ return V;
+ };
+
+ auto PatternRot0 = m_Intrinsic<PartialReduceInt>(
+ m_Intrinsic<PartialReduceInt>(m_Value(Phi),
+ m_Mul(m_Value(BReal), m_Value(AReal))),
+ m_Neg(m_Mul(m_Value(BImag), m_Value(AImag))));
+
+ auto PatternRot270 = m_Intrinsic<PartialReduceInt>(
+ m_Intrinsic<PartialReduceInt>(
+ m_Value(Phi), m_Neg(m_Mul(m_Value(BReal), m_Value(AImag)))),
+ m_Mul(m_Value(BImag), m_Value(AReal)));
+
+ if (match(Inst, PatternRot0)) {
+ CN->Rotation = ComplexDeinterleavingRotation::Rotation_0;
+ } else if (match(Inst, PatternRot270)) {
+ CN->Rotation = ComplexDeinterleavingRotation::Rotation_270;
+ } else {
+ Value *A0, *A1;
+ // The rotations 90 and 180 share the same operation pattern, so inspect the
+ // order of the operands, identifying where the real and imaginary
+ // components of A go, to discern between the aforementioned rotations.
+ auto PatternRot90Rot180 = m_Intrinsic<PartialReduceInt>(
+ m_Intrinsic<PartialReduceInt>(m_Value(Phi),
+ m_Mul(m_Value(BReal), m_Value(A0))),
+ m_Mul(m_Value(BImag), m_Value(A1)));
+
+ if (!match(Inst, PatternRot90Rot180))
+ return nullptr;
+
+ A0 = UnwrapCast(A0);
+ A1 = UnwrapCast(A1);
+
+ // Test if A0 is real/A1 is imag
+ ANode = identifyNode(A0, A1);
+ if (!ANode) {
+ // Test if A0 is imag/A1 is real
+ ANode = identifyNode(A1, A0);
+ // Unable to identify operand components, thus unable to identify rotation
+ if (!ANode)
+ return nullptr;
+ CN->Rotation = ComplexDeinterleavingRotation::Rotation_90;
+ AReal = A1;
+ AImag = A0;
+ } else {
+ AReal = A0;
+ AImag = A1;
+ CN->Rotation = ComplexDeinterleavingRotation::Rotation_180;
+ }
+ }
+
+ AReal = UnwrapCast(AReal);
+ AImag = UnwrapCast(AImag);
+ BReal = UnwrapCast(BReal);
+ BImag = UnwrapCast(BImag);
+
+ VectorType *VTy = cast<VectorType>(V->getType());
+ Type *ExpectedOperandTy = VectorType::getSubdividedVectorType(VTy, 2);
+ if (AReal->getType() != ExpectedOperandTy)
+ return nullptr;
+ if (AImag->getType() != ExpectedOperandTy)
+ return nullptr;
+ if (BReal->getType() != ExpectedOperandTy)
+ return nullptr;
+ if (BImag->getType() != ExpectedOperandTy)
+ return nullptr;
+
+ if (Phi->getType() != VTy && RealUser->getType() != VTy)
+ return nullptr;
+
+ NodePtr Node = identifyNode(AReal, AImag);
+
+ // In the case that a node was identified to figure out the rotation, ensure
+ // that trying to identify a node with AReal and AImag post-unwrap results in
+ // the same node
+ if (ANode && Node != ANode) {
+ LLVM_DEBUG(
+ dbgs()
+ << "Identified node is different from previously identified node. "
+ "Unable to confidently generate a complex operation node\n");
+ return nullptr;
+ }
+
+ CN->addOperand(Node);
+ CN->addOperand(identifyNode(BReal, BImag));
+ CN->addOperand(identifyNode(Phi, RealUser));
+
+ return submitCompositeNode(CN);
+}
+
+ComplexDeinterleavingGraph::NodePtr
+ComplexDeinterleavingGraph::identifyPartialReduction(Value *R, Value *I) {
+ // Partial reductions don't support non-vector types, so check these first
+ if (!isa<VectorType>(R->getType()) || !isa<VectorType>(I->getType()))
+ return nullptr;
+
+ auto CommonUser =
+ findCommonBetweenCollections<Value *>(R->users(), I->users());
+ if (!CommonUser)
+ return nullptr;
+
+ auto *IInst = dyn_cast<IntrinsicInst>(*CommonUser);
+ if (!IInst || IInst->getIntrinsicID() !=
+ Intrinsic::experimental_vector_partial_reduce_add)
+ return nullptr;
+
+ if (NodePtr CN = identifyDotProduct(IInst))
+ return CN;
+
+ return nullptr;
+}
+ComplexDeinterleavingGraph::NodePtr
+ComplexDeinterleavingGraph::identifyNode(Value *R, Value *I) {
auto It = CachedResult.find({R, I});
if (It != CachedResult.end()) {
LLVM_DEBUG(dbgs() << " - Folding to existing node\n");
return It->second;
}
+ if (NodePtr CN = identifyPartialReduction(R, I))
+ return CN;
+
+ bool IsReduction = RealPHI == R && (!ImagPHI || ImagPHI == I);
+ if (!IsReduction && R->getType() != I->getType())
+ return nullptr;
+
if (NodePtr CN = identifySplat(R, I))
return CN;
@@ -1427,12 +1590,20 @@ bool ComplexDeinterleavingGraph::identifyNodes(Instruction *RootI) {
if (It != RootToNode.end()) {
auto RootNode = It->second;
assert(RootNode->Operation ==
- ComplexDeinterleavingOperation::ReductionOperation);
+ ComplexDeinterleavingOperation::ReductionOperation ||
+ RootNode->Operation ==
+ ComplexDeinterleavingOperation::ReductionSingle);
// Find out which part, Real or Imag, comes later, and only if we come to
// the latest part, add it to OrderedRoots.
auto *R = cast<Instruction>(RootNode->Real);
- auto *I = cast<Instruction>(RootNode->Imag);
- auto *ReplacementAnchor = R->comesBefore(I) ? I : R;
+ auto *I = RootNode->Imag ? cast<Instruction>(RootNode->Imag) : nullptr;
+
+ Instruction *ReplacementAnchor;
+ if (I)
+ ReplacementAnchor = R->comesBefore(I) ? I : R;
+ else
+ ReplacementAnchor = R;
+
if (ReplacementAnchor != RootI)
return false;
OrderedRoots.push_back(RootI);
@@ -1523,7 +1694,6 @@ void ComplexDeinterleavingGraph::identifyReductionNodes() {
for (size_t j = i + 1; j < OperationInstruction.size(); ++j) {
if (Processed[j])
continue;
-
auto *Real = OperationInstruction[i];
auto *Imag = OperationInstruction[j];
if (Real->getType() != Imag->getType())
@@ -1556,6 +1726,28 @@ void ComplexDeinterleavingGraph::identifyReductionNodes() {
break;
}
}
+
+ auto *Real = OperationInstruction[i];
+ // We want to check that we have 2 operands, but the function attributes
+ // being counted as operands bloats this value.
+ if (Real->getNumOperands() < 2)
+ continue;
+
+ RealPHI = ReductionInfo[Real].first;
+ ImagPHI = nullptr;
+ PHIsFound = false;
+ auto Node = identifyNode(Real->getOperand(0), Real->getOperand(1));
+ if (Node && PHIsFound) {
+ LLVM_DEBUG(
+ dbgs() << "Identified single reduction starting from instruction: "
+ << *Real << "/" << *ReductionInfo[Real].second << "\n");
+ Processed[i] = true;
+ auto RootNode = prepareCompositeNode(
+ ComplexDeinterleavingOperation::ReductionSingle, Real, nullptr);
+ RootNode->addOperand(Node);
+ RootToNode[Real] = RootNode;
+ submitCompositeNode(RootNode);
+ }
}
RealPHI = nullptr;
@@ -1563,6 +1755,24 @@ void ComplexDeinterleavingGraph::identifyReductionNodes() {
}
bool ComplexDeinterleavingGraph::checkNodes() {
+
+ bool FoundDeinterleaveNode = false;
+ for (NodePtr N : CompositeNodes) {
+ if (!N->areOperandsValid())
+ return false;
+ if (N->Operation == ComplexDeinterleavingOperation::Deinterleave)
+ FoundDeinterleaveNode = true;
+ }
+
+ // We need a deinterleave node in order to guarantee that we're working with
+ // complex numbers.
+ if (!FoundDeinterleaveNode) {
+ LLVM_DEBUG(
+ dbgs() << "Couldn't find a deinterleave node within the graph, cannot "
+ "guarantee safety during graph transformation.\n");
+ return false;
+ }
+
// Collect all instructions from roots to leaves
SmallPtrSet<Instruction *, 16> AllInstructions;
SmallVector<Instruction *, 8> Worklist;
@@ -1831,7 +2041,7 @@ ComplexDeinterleavingGraph::identifySplat(Value *R, Value *I) {
ComplexDeinterleavingGraph::NodePtr
ComplexDeinterleavingGraph::identifyPHINode(Instruction *Real,
Instruction *Imag) {
- if (Real != RealPHI || Imag != ImagPHI)
+ if (Real != RealPHI || (ImagPHI && Imag != ImagPHI))
return nullptr;
PHIsFound = true;
@@ -1926,6 +2136,16 @@ Value *ComplexDeinterleavingGraph::replaceNode(IRBuilderBase &Builder,
Value *ReplacementNode;
switch (Node->Operation) {
+ case ComplexDeinterleavingOperation::CDot: {
+ Value *Input0 = ReplaceOperandIfExist(Node, 0);
+ Value *Input1 = ReplaceOperandIfExist(Node, 1);
+ Value *Accumulator = ReplaceOperandIfExist(Node, 2);
+ assert(!Input1 || (Input0->getType() == Input1->getType() &&
+ "Node inputs need to be of the same type"));
+ ReplacementNode = TL->createComplexDeinterleavingIR(
+ Builder, Node->Operation, Node->Rotation, Input0, Input1, Accumulator);
+ break;
+ }
case ComplexDeinterleavingOperation::CAdd:
case ComplexDeinterleavingOperation::CMulPartial:
case ComplexDeinterleavingOperation::Symmetric: {
@@ -1969,13 +2189,18 @@ Value *ComplexDeinterleavingGraph::replaceNode(IRBuilderBase &Builder,
case ComplexDeinterleavingOperation::ReductionPHI: {
// If Operation is ReductionPHI, a new empty PHINode is created.
// It is filled later when the ReductionOperation is processed.
+ auto *OldPHI = cast<PHINode>(Node->Real);
auto *VTy = cast<VectorType>(Node->Real->getType());
auto *NewVTy = VectorType::getDoubleElementsVectorType(VTy);
auto *NewPHI = PHINode::Create(NewVTy, 0, "", BackEdge->getFirstNonPHIIt());
- OldToNewPHI[dyn_cast<PHINode>(Node->Real)] = NewPHI;
+ OldToNewPHI[OldPHI] = NewPHI;
ReplacementNode = NewPHI;
break;
}
+ case ComplexDeinterleavingOperation::ReductionSingle:
+ ReplacementNode = replaceNode(Builder, Node->Operands[0]);
+ processReductionSingle(ReplacementNode, Node);
+ break;
case ComplexDeinterleavingOperation::ReductionOperation:
ReplacementNode = replaceNode(Builder, Node->Operands[0]);
processReductionOperation(ReplacementNode, Node);
@@ -2000,6 +2225,38 @@ Value *ComplexDeinterleavingGraph::replaceNode(IRBuilderBase &Builder,
return ReplacementNode;
}
+void ComplexDeinterleavingGraph::processReductionSingle(
+ Value *OperationReplacement, RawNodePtr Node) {
+ auto *Real = cast<Instruction>(Node->Real);
+ auto *OldPHI = ReductionInfo[Real].first;
+ auto *NewPHI = OldToNewPHI[OldPHI];
+ auto *VTy = cast<VectorType>(Real->getType());
+ auto *NewVTy = VectorType::getDoubleElementsVectorType(VTy);
+
+ Value *Init = OldPHI->getIncomingValueForBlock(Incoming);
+
+ IRBuilder<> Builder(Incoming->getTerminator());
+
+ Value *NewInit = nullptr;
+ if (auto *C = dyn_cast<Constant>(Init)) {
+ if (C->isZeroValue())
+ NewInit = Constant::getNullValue(NewVTy);
+ }
+
+ if (!NewInit)
+ NewInit = Builder.CreateIntrinsic(Intrinsic::vector_interleave2, NewVTy,
+ {Init, Constant::getNullValue(VTy)});
+
+ NewPHI->addIncoming(NewInit, Incoming);
+ NewPHI->addIncoming(OperationReplacement, BackEdge);
+
+ auto *FinalReduction = ReductionInfo[Real].second;
+ Builder.SetInsertPoint(&*FinalReduction->getParent()->getFirstInsertionPt());
+
+ auto *AddReduce = Builder.CreateAddReduce(OperationReplacement);
+ FinalReduction->replaceAllUsesWith(AddReduce);
+}
+
void ComplexDeinterleavingGraph::processReductionOperation(
Value *OperationReplacement, RawNodePtr Node) {
auto *Real = cast<Instruction>(Node->Real);
@@ -2059,8 +2316,13 @@ void ComplexDeinterleavingGraph::replaceNodes() {
auto *RootImag = cast<Instruction>(RootNode->Imag);
ReductionInfo[RootReal].first->removeIncomingValue(BackEdge);
ReductionInfo[RootImag].first->removeIncomingValue(BackEdge);
- DeadInstrRoots.push_back(cast<Instruction>(RootReal));
- DeadInstrRoots.push_back(cast<Instruction>(RootImag));
+ DeadInstrRoots.push_back(RootReal);
+ DeadInstrRoots.push_back(RootImag);
+ } else if (RootNode->Operation ==
+ ComplexDeinterleavingOperation::ReductionSingle) {
+ auto *RootInst = cast<Instruction>(RootNode->Real);
+ ReductionInfo[RootInst].first->removeIncomingValue(BackEdge);
+ DeadInstrRoots.push_back(ReductionInfo[RootInst].second);
} else {
assert(R && "Unable to find replacement for RootInstruction");
DeadInstrRoots.push_back(RootInstruction);
diff --git a/llvm/lib/CodeGen/ExpandMemCmp.cpp b/llvm/lib/CodeGen/ExpandMemCmp.cpp
index f8ca7e3..74f93e1 100644
--- a/llvm/lib/CodeGen/ExpandMemCmp.cpp
+++ b/llvm/lib/CodeGen/ExpandMemCmp.cpp
@@ -669,17 +669,25 @@ Value *MemCmpExpansion::getMemCmpOneBlock() {
if (CI->hasOneUser()) {
auto *UI = cast<Instruction>(*CI->user_begin());
CmpPredicate Pred = ICmpInst::Predicate::BAD_ICMP_PREDICATE;
- uint64_t Shift;
bool NeedsZExt = false;
// This is a special case because instead of checking if the result is less
// than zero:
// bool result = memcmp(a, b, NBYTES) < 0;
// Compiler is clever enough to generate the following code:
// bool result = memcmp(a, b, NBYTES) >> 31;
- if (match(UI, m_LShr(m_Value(), m_ConstantInt(Shift))) &&
- Shift == (CI->getType()->getIntegerBitWidth() - 1)) {
+ if (match(UI,
+ m_LShr(m_Value(),
+ m_SpecificInt(CI->getType()->getIntegerBitWidth() - 1)))) {
Pred = ICmpInst::ICMP_SLT;
NeedsZExt = true;
+ } else if (match(UI, m_SpecificICmp(ICmpInst::ICMP_SGT, m_Specific(CI),
+ m_AllOnes()))) {
+ // Adjust predicate as if it compared with 0.
+ Pred = ICmpInst::ICMP_SGE;
+ } else if (match(UI, m_SpecificICmp(ICmpInst::ICMP_SLT, m_Specific(CI),
+ m_One()))) {
+ // Adjust predicate as if it compared with 0.
+ Pred = ICmpInst::ICMP_SLE;
} else {
// In case of a successful match this call will set `Pred` variable
match(UI, m_ICmp(Pred, m_Specific(CI), m_Zero()));
@@ -696,17 +704,9 @@ Value *MemCmpExpansion::getMemCmpOneBlock() {
}
}
- // The result of memcmp is negative, zero, or positive, so produce that by
- // subtracting 2 extended compare bits: sub (ugt, ult).
- // If a target prefers to use selects to get -1/0/1, they should be able
- // to transform this later. The inverse transform (going from selects to math)
- // may not be possible in the DAG because the selects got converted into
- // branches before we got there.
- Value *CmpUGT = Builder.CreateICmpUGT(Loads.Lhs, Loads.Rhs);
- Value *CmpULT = Builder.CreateICmpULT(Loads.Lhs, Loads.Rhs);
- Value *ZextUGT = Builder.CreateZExt(CmpUGT, Builder.getInt32Ty());
- Value *ZextULT = Builder.CreateZExt(CmpULT, Builder.getInt32Ty());
- return Builder.CreateSub(ZextUGT, ZextULT);
+ // The result of memcmp is negative, zero, or positive.
+ return Builder.CreateIntrinsic(Builder.getInt32Ty(), Intrinsic::ucmp,
+ {Loads.Lhs, Loads.Rhs});
}
// This function expands the memcmp call into an inline expansion and returns
diff --git a/llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp b/llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp
index c20e9d0..4e3aaf5d 100644
--- a/llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp
+++ b/llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp
@@ -6864,6 +6864,23 @@ bool CombinerHelper::tryFoldSelectOfConstants(GSelect *Select,
};
return true;
}
+
+ // select Cond, 0, Pow2 --> (zext (!Cond)) << log2(Pow2)
+ if (FalseValue.isPowerOf2() && TrueValue.isZero()) {
+ MatchInfo = [=](MachineIRBuilder &B) {
+ B.setInstrAndDebugLoc(*Select);
+ Register Not = MRI.createGenericVirtualRegister(CondTy);
+ B.buildNot(Not, Cond);
+ Register Inner = MRI.createGenericVirtualRegister(TrueTy);
+ B.buildZExtOrTrunc(Inner, Not);
+ // The shift amount must be scalar.
+ LLT ShiftTy = TrueTy.isVector() ? TrueTy.getElementType() : TrueTy;
+ auto ShAmtC = B.buildConstant(ShiftTy, FalseValue.exactLogBase2());
+ B.buildShl(Dest, Inner, ShAmtC, Flags);
+ };
+ return true;
+ }
+
// select Cond, -1, C --> or (sext Cond), C
if (TrueValue.isAllOnes()) {
MatchInfo = [=](MachineIRBuilder &B) {
@@ -7045,6 +7062,34 @@ bool CombinerHelper::matchSelectIMinMax(const MachineOperand &MO,
}
}
+// (neg (min/max x, (neg x))) --> (max/min x, (neg x))
+bool CombinerHelper::matchSimplifyNegMinMax(MachineInstr &MI,
+ BuildFnTy &MatchInfo) const {
+ assert(MI.getOpcode() == TargetOpcode::G_SUB);
+ Register DestReg = MI.getOperand(0).getReg();
+ LLT DestTy = MRI.getType(DestReg);
+
+ Register X;
+ Register Sub0;
+ auto NegPattern = m_all_of(m_Neg(m_DeferredReg(X)), m_Reg(Sub0));
+ if (mi_match(DestReg, MRI,
+ m_Neg(m_OneUse(m_any_of(m_GSMin(m_Reg(X), NegPattern),
+ m_GSMax(m_Reg(X), NegPattern),
+ m_GUMin(m_Reg(X), NegPattern),
+ m_GUMax(m_Reg(X), NegPattern)))))) {
+ MachineInstr *MinMaxMI = MRI.getVRegDef(MI.getOperand(2).getReg());
+ unsigned NewOpc = getInverseGMinMaxOpcode(MinMaxMI->getOpcode());
+ if (isLegal({NewOpc, {DestTy}})) {
+ MatchInfo = [=](MachineIRBuilder &B) {
+ B.buildInstr(NewOpc, {DestReg}, {X, Sub0});
+ };
+ return true;
+ }
+ }
+
+ return false;
+}
+
bool CombinerHelper::matchSelect(MachineInstr &MI, BuildFnTy &MatchInfo) const {
GSelect *Select = cast<GSelect>(&MI);
diff --git a/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp b/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp
index e2247f7..d0a6234 100644
--- a/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp
+++ b/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp
@@ -22,6 +22,7 @@
#include "llvm/CodeGen/GlobalISel/MIPatternMatch.h"
#include "llvm/CodeGen/GlobalISel/MachineIRBuilder.h"
#include "llvm/CodeGen/GlobalISel/Utils.h"
+#include "llvm/CodeGen/LowLevelTypeUtils.h"
#include "llvm/CodeGen/MachineConstantPool.h"
#include "llvm/CodeGen/MachineFrameInfo.h"
#include "llvm/CodeGen/MachineRegisterInfo.h"
@@ -3022,8 +3023,19 @@ LegalizerHelper::widenScalar(MachineInstr &MI, unsigned TypeIdx, LLT WideTy) {
return UnableToLegalize;
LLT Ty = MRI.getType(MI.getOperand(0).getReg());
- if (!Ty.isScalar())
- return UnableToLegalize;
+ assert(!Ty.isPointerOrPointerVector() && "Can't widen type");
+ if (!Ty.isScalar()) {
+ // We need to widen the vector element type.
+ Observer.changingInstr(MI);
+ widenScalarSrc(MI, WideTy, 0, TargetOpcode::G_ANYEXT);
+ // We also need to adjust the MMO to turn this into a truncating store.
+ MachineMemOperand &MMO = **MI.memoperands_begin();
+ MachineFunction &MF = MIRBuilder.getMF();
+ auto *NewMMO = MF.getMachineMemOperand(&MMO, MMO.getPointerInfo(), Ty);
+ MI.setMemRefs(MF, {NewMMO});
+ Observer.changedInstr(MI);
+ return Legalized;
+ }
Observer.changingInstr(MI);
@@ -4106,10 +4118,7 @@ LegalizerHelper::LegalizeResult LegalizerHelper::lowerStore(GStore &StoreMI) {
unsigned StoreWidth = MemTy.getSizeInBits();
unsigned StoreSizeInBits = 8 * MemTy.getSizeInBytes();
- if (StoreWidth != StoreSizeInBits) {
- if (SrcTy.isVector())
- return UnableToLegalize;
-
+ if (StoreWidth != StoreSizeInBits && !SrcTy.isVector()) {
// Promote to a byte-sized store with upper bits zero if not
// storing an integral number of bytes. For example, promote
// TRUNCSTORE:i1 X -> TRUNCSTORE:i8 (and X, 1)
@@ -4131,9 +4140,8 @@ LegalizerHelper::LegalizeResult LegalizerHelper::lowerStore(GStore &StoreMI) {
}
if (MemTy.isVector()) {
- // TODO: Handle vector trunc stores
if (MemTy != SrcTy)
- return UnableToLegalize;
+ return scalarizeVectorBooleanStore(StoreMI);
// TODO: We can do better than scalarizing the vector and at least split it
// in half.
@@ -4189,6 +4197,50 @@ LegalizerHelper::LegalizeResult LegalizerHelper::lowerStore(GStore &StoreMI) {
}
LegalizerHelper::LegalizeResult
+LegalizerHelper::scalarizeVectorBooleanStore(GStore &StoreMI) {
+ Register SrcReg = StoreMI.getValueReg();
+ Register PtrReg = StoreMI.getPointerReg();
+ LLT SrcTy = MRI.getType(SrcReg);
+ MachineMemOperand &MMO = **StoreMI.memoperands_begin();
+ LLT MemTy = MMO.getMemoryType();
+ LLT MemScalarTy = MemTy.getElementType();
+ MachineFunction &MF = MIRBuilder.getMF();
+
+ assert(SrcTy.isVector() && "Expect a vector store type");
+
+ if (!MemScalarTy.isByteSized()) {
+ // We need to build an integer scalar of the vector bit pattern.
+ // It's not legal for us to add padding when storing a vector.
+ unsigned NumBits = MemTy.getSizeInBits();
+ LLT IntTy = LLT::scalar(NumBits);
+ auto CurrVal = MIRBuilder.buildConstant(IntTy, 0);
+ LLT IdxTy = getLLTForMVT(TLI.getVectorIdxTy(MF.getDataLayout()));
+
+ for (unsigned I = 0, E = MemTy.getNumElements(); I < E; ++I) {
+ auto Elt = MIRBuilder.buildExtractVectorElement(
+ SrcTy.getElementType(), SrcReg, MIRBuilder.buildConstant(IdxTy, I));
+ auto Trunc = MIRBuilder.buildTrunc(MemScalarTy, Elt);
+ auto ZExt = MIRBuilder.buildZExt(IntTy, Trunc);
+ unsigned ShiftIntoIdx = MF.getDataLayout().isBigEndian()
+ ? (MemTy.getNumElements() - 1) - I
+ : I;
+ auto ShiftAmt = MIRBuilder.buildConstant(
+ IntTy, ShiftIntoIdx * MemScalarTy.getSizeInBits());
+ auto Shifted = MIRBuilder.buildShl(IntTy, ZExt, ShiftAmt);
+ CurrVal = MIRBuilder.buildOr(IntTy, CurrVal, Shifted);
+ }
+ auto PtrInfo = MMO.getPointerInfo();
+ auto *NewMMO = MF.getMachineMemOperand(&MMO, PtrInfo, IntTy);
+ MIRBuilder.buildStore(CurrVal, PtrReg, *NewMMO);
+ StoreMI.eraseFromParent();
+ return Legalized;
+ }
+
+ // TODO: implement simple scalarization.
+ return UnableToLegalize;
+}
+
+LegalizerHelper::LegalizeResult
LegalizerHelper::bitcast(MachineInstr &MI, unsigned TypeIdx, LLT CastTy) {
switch (MI.getOpcode()) {
case TargetOpcode::G_LOAD: {
@@ -4653,6 +4705,20 @@ LegalizerHelper::createStackTemporary(TypeSize Bytes, Align Alignment,
return MIRBuilder.buildFrameIndex(FramePtrTy, FrameIdx);
}
+MachineInstrBuilder LegalizerHelper::createStackStoreLoad(const DstOp &Res,
+ const SrcOp &Val) {
+ LLT SrcTy = Val.getLLTTy(MRI);
+ Align StackTypeAlign =
+ std::max(getStackTemporaryAlignment(SrcTy),
+ getStackTemporaryAlignment(Res.getLLTTy(MRI)));
+ MachinePointerInfo PtrInfo;
+ auto StackTemp =
+ createStackTemporary(SrcTy.getSizeInBytes(), StackTypeAlign, PtrInfo);
+
+ MIRBuilder.buildStore(Val, StackTemp, PtrInfo, StackTypeAlign);
+ return MIRBuilder.buildLoad(Res, StackTemp, PtrInfo, StackTypeAlign);
+}
+
static Register clampVectorIndex(MachineIRBuilder &B, Register IdxReg,
LLT VecTy) {
LLT IdxTy = B.getMRI()->getType(IdxReg);
diff --git a/llvm/lib/CodeGen/GlobalISel/Utils.cpp b/llvm/lib/CodeGen/GlobalISel/Utils.cpp
index 7938293..625d556 100644
--- a/llvm/lib/CodeGen/GlobalISel/Utils.cpp
+++ b/llvm/lib/CodeGen/GlobalISel/Utils.cpp
@@ -276,6 +276,21 @@ void llvm::reportGISelFailure(MachineFunction &MF, const TargetPassConfig &TPC,
reportGISelFailure(MF, TPC, MORE, R);
}
+unsigned llvm::getInverseGMinMaxOpcode(unsigned MinMaxOpc) {
+ switch (MinMaxOpc) {
+ case TargetOpcode::G_SMIN:
+ return TargetOpcode::G_SMAX;
+ case TargetOpcode::G_SMAX:
+ return TargetOpcode::G_SMIN;
+ case TargetOpcode::G_UMIN:
+ return TargetOpcode::G_UMAX;
+ case TargetOpcode::G_UMAX:
+ return TargetOpcode::G_UMIN;
+ default:
+ llvm_unreachable("unrecognized opcode");
+ }
+}
+
std::optional<APInt> llvm::getIConstantVRegVal(Register VReg,
const MachineRegisterInfo &MRI) {
std::optional<ValueAndVReg> ValAndVReg = getIConstantVRegValWithLookThrough(
diff --git a/llvm/lib/CodeGen/LiveRegMatrix.cpp b/llvm/lib/CodeGen/LiveRegMatrix.cpp
index 9744c47..3367171 100644
--- a/llvm/lib/CodeGen/LiveRegMatrix.cpp
+++ b/llvm/lib/CodeGen/LiveRegMatrix.cpp
@@ -66,7 +66,7 @@ void LiveRegMatrix::init(MachineFunction &MF, LiveIntervals &pLIS,
unsigned NumRegUnits = TRI->getNumRegUnits();
if (NumRegUnits != Matrix.size())
Queries.reset(new LiveIntervalUnion::Query[NumRegUnits]);
- Matrix.init(LIUAlloc, NumRegUnits);
+ Matrix.init(*LIUAlloc, NumRegUnits);
// Make sure no stale queries get reused.
invalidateVirtRegs();
diff --git a/llvm/lib/CodeGen/MIRSampleProfile.cpp b/llvm/lib/CodeGen/MIRSampleProfile.cpp
index 23db09b..9bba50e8 100644
--- a/llvm/lib/CodeGen/MIRSampleProfile.cpp
+++ b/llvm/lib/CodeGen/MIRSampleProfile.cpp
@@ -46,8 +46,9 @@ static cl::opt<bool> ShowFSBranchProb(
cl::desc("Print setting flow sensitive branch probabilities"));
static cl::opt<unsigned> FSProfileDebugProbDiffThreshold(
"fs-profile-debug-prob-diff-threshold", cl::init(10),
- cl::desc("Only show debug message if the branch probility is greater than "
- "this value (in percentage)."));
+ cl::desc(
+ "Only show debug message if the branch probability is greater than "
+ "this value (in percentage)."));
static cl::opt<unsigned> FSProfileDebugBWThreshold(
"fs-profile-debug-bw-threshold", cl::init(10000),
diff --git a/llvm/lib/CodeGen/MachineBlockPlacement.cpp b/llvm/lib/CodeGen/MachineBlockPlacement.cpp
index 0f68313..05bc4cf 100644
--- a/llvm/lib/CodeGen/MachineBlockPlacement.cpp
+++ b/llvm/lib/CodeGen/MachineBlockPlacement.cpp
@@ -149,7 +149,7 @@ static cl::opt<unsigned> JumpInstCost("jump-inst-cost",
static cl::opt<bool>
TailDupPlacement("tail-dup-placement",
cl::desc("Perform tail duplication during placement. "
- "Creates more fallthrough opportunites in "
+ "Creates more fallthrough opportunities in "
"outline branches."),
cl::init(true), cl::Hidden);
diff --git a/llvm/lib/CodeGen/MachineBranchProbabilityInfo.cpp b/llvm/lib/CodeGen/MachineBranchProbabilityInfo.cpp
index 56fffff..2e92dd8 100644
--- a/llvm/lib/CodeGen/MachineBranchProbabilityInfo.cpp
+++ b/llvm/lib/CodeGen/MachineBranchProbabilityInfo.cpp
@@ -29,7 +29,7 @@ namespace llvm {
cl::opt<unsigned>
StaticLikelyProb("static-likely-prob",
cl::desc("branch probability threshold in percentage"
- "to be considered very likely"),
+ " to be considered very likely"),
cl::init(80), cl::Hidden);
cl::opt<unsigned> ProfileLikelyProb(
diff --git a/llvm/lib/CodeGen/MachineOperand.cpp b/llvm/lib/CodeGen/MachineOperand.cpp
index 3a9bdde..5c9ca91 100644
--- a/llvm/lib/CodeGen/MachineOperand.cpp
+++ b/llvm/lib/CodeGen/MachineOperand.cpp
@@ -1170,6 +1170,9 @@ void MachineMemOperand::print(raw_ostream &OS, ModuleSlotTracker &MST,
if (getFlags() & MachineMemOperand::MOTargetFlag3)
OS << '"' << getTargetMMOFlagName(*TII, MachineMemOperand::MOTargetFlag3)
<< "\" ";
+ if (getFlags() & MachineMemOperand::MOTargetFlag4)
+ OS << '"' << getTargetMMOFlagName(*TII, MachineMemOperand::MOTargetFlag4)
+ << "\" ";
} else {
if (getFlags() & MachineMemOperand::MOTargetFlag1)
OS << "\"MOTargetFlag1\" ";
@@ -1177,6 +1180,8 @@ void MachineMemOperand::print(raw_ostream &OS, ModuleSlotTracker &MST,
OS << "\"MOTargetFlag2\" ";
if (getFlags() & MachineMemOperand::MOTargetFlag3)
OS << "\"MOTargetFlag3\" ";
+ if (getFlags() & MachineMemOperand::MOTargetFlag4)
+ OS << "\"MOTargetFlag4\" ";
}
assert((isLoad() || isStore()) &&
diff --git a/llvm/lib/CodeGen/MachineRegisterInfo.cpp b/llvm/lib/CodeGen/MachineRegisterInfo.cpp
index 6f636a1..394b99b 100644
--- a/llvm/lib/CodeGen/MachineRegisterInfo.cpp
+++ b/llvm/lib/CodeGen/MachineRegisterInfo.cpp
@@ -407,9 +407,11 @@ void MachineRegisterInfo::replaceRegWith(Register FromReg, Register ToReg) {
MachineInstr *MachineRegisterInfo::getVRegDef(Register Reg) const {
// Since we are in SSA form, we can use the first definition.
def_instr_iterator I = def_instr_begin(Reg);
- assert((I.atEnd() || std::next(I) == def_instr_end()) &&
- "getVRegDef assumes a single definition or no definition");
- return !I.atEnd() ? &*I : nullptr;
+ if (I == def_instr_end())
+ return nullptr;
+ assert(std::next(I) == def_instr_end() &&
+ "getVRegDef assumes at most one definition");
+ return &*I;
}
/// getUniqueVRegDef - Return the unique machine instr that defines the
diff --git a/llvm/lib/CodeGen/MachineTraceMetrics.cpp b/llvm/lib/CodeGen/MachineTraceMetrics.cpp
index 6576f97..021c1a0 100644
--- a/llvm/lib/CodeGen/MachineTraceMetrics.cpp
+++ b/llvm/lib/CodeGen/MachineTraceMetrics.cpp
@@ -683,11 +683,10 @@ struct DataDep {
DataDep(const MachineRegisterInfo *MRI, unsigned VirtReg, unsigned UseOp)
: UseOp(UseOp) {
assert(Register::isVirtualRegister(VirtReg));
- MachineRegisterInfo::def_iterator DefI = MRI->def_begin(VirtReg);
- assert(!DefI.atEnd() && "Register has no defs");
- DefMI = DefI->getParent();
- DefOp = DefI.getOperandNo();
- assert((++DefI).atEnd() && "Register has multiple defs");
+ MachineOperand *DefMO = MRI->getOneDef(VirtReg);
+ assert(DefMO && "Register does not have unique def");
+ DefMI = DefMO->getParent();
+ DefOp = DefMO->getOperandNo();
}
};
diff --git a/llvm/lib/CodeGen/PostRASchedulerList.cpp b/llvm/lib/CodeGen/PostRASchedulerList.cpp
index 2f7cfdd..badfd9a6 100644
--- a/llvm/lib/CodeGen/PostRASchedulerList.cpp
+++ b/llvm/lib/CodeGen/PostRASchedulerList.cpp
@@ -98,12 +98,6 @@ namespace {
}
bool runOnMachineFunction(MachineFunction &Fn) override;
-
- private:
- bool enablePostRAScheduler(
- const TargetSubtargetInfo &ST, CodeGenOptLevel OptLevel,
- TargetSubtargetInfo::AntiDepBreakMode &Mode,
- TargetSubtargetInfo::RegClassVector &CriticalPathRCs) const;
};
char PostRAScheduler::ID = 0;
@@ -259,13 +253,8 @@ LLVM_DUMP_METHOD void SchedulePostRATDList::dumpSchedule() const {
}
#endif
-bool PostRAScheduler::enablePostRAScheduler(
- const TargetSubtargetInfo &ST, CodeGenOptLevel OptLevel,
- TargetSubtargetInfo::AntiDepBreakMode &Mode,
- TargetSubtargetInfo::RegClassVector &CriticalPathRCs) const {
- Mode = ST.getAntiDepBreakMode();
- ST.getCriticalPathRCs(CriticalPathRCs);
-
+static bool enablePostRAScheduler(const TargetSubtargetInfo &ST,
+ CodeGenOptLevel OptLevel) {
// Check for explicit enable/disable of post-ra scheduling.
if (EnablePostRAScheduler.getPosition() > 0)
return EnablePostRAScheduler;
@@ -278,24 +267,17 @@ bool PostRAScheduler::runOnMachineFunction(MachineFunction &Fn) {
if (skipFunction(Fn.getFunction()))
return false;
- TII = Fn.getSubtarget().getInstrInfo();
- MachineLoopInfo &MLI = getAnalysis<MachineLoopInfoWrapperPass>().getLI();
- AliasAnalysis *AA = &getAnalysis<AAResultsWrapperPass>().getAAResults();
+ const auto &Subtarget = Fn.getSubtarget();
TargetPassConfig *PassConfig = &getAnalysis<TargetPassConfig>();
-
- RegClassInfo.runOnMachineFunction(Fn);
-
- TargetSubtargetInfo::AntiDepBreakMode AntiDepMode =
- TargetSubtargetInfo::ANTIDEP_NONE;
- SmallVector<const TargetRegisterClass*, 4> CriticalPathRCs;
-
// Check that post-RA scheduling is enabled for this target.
- // This may upgrade the AntiDepMode.
- if (!enablePostRAScheduler(Fn.getSubtarget(), PassConfig->getOptLevel(),
- AntiDepMode, CriticalPathRCs))
+ if (!enablePostRAScheduler(Subtarget, PassConfig->getOptLevel()))
return false;
- // Check for antidep breaking override...
+ TII = Subtarget.getInstrInfo();
+ MachineLoopInfo &MLI = getAnalysis<MachineLoopInfoWrapperPass>().getLI();
+ AliasAnalysis *AA = &getAnalysis<AAResultsWrapperPass>().getAAResults();
+ TargetSubtargetInfo::AntiDepBreakMode AntiDepMode =
+ Subtarget.getAntiDepBreakMode();
if (EnableAntiDepBreaking.getPosition() > 0) {
AntiDepMode = (EnableAntiDepBreaking == "all")
? TargetSubtargetInfo::ANTIDEP_ALL
@@ -303,6 +285,9 @@ bool PostRAScheduler::runOnMachineFunction(MachineFunction &Fn) {
? TargetSubtargetInfo::ANTIDEP_CRITICAL
: TargetSubtargetInfo::ANTIDEP_NONE);
}
+ SmallVector<const TargetRegisterClass *, 4> CriticalPathRCs;
+ Subtarget.getCriticalPathRCs(CriticalPathRCs);
+ RegClassInfo.runOnMachineFunction(Fn);
LLVM_DEBUG(dbgs() << "PostRAScheduler\n");
diff --git a/llvm/lib/CodeGen/ReachingDefAnalysis.cpp b/llvm/lib/CodeGen/ReachingDefAnalysis.cpp
index 79b0fa6..3ab6315 100644
--- a/llvm/lib/CodeGen/ReachingDefAnalysis.cpp
+++ b/llvm/lib/CodeGen/ReachingDefAnalysis.cpp
@@ -30,22 +30,22 @@ static bool isValidRegUse(const MachineOperand &MO) {
return isValidReg(MO) && MO.isUse();
}
-static bool isValidRegUseOf(const MachineOperand &MO, MCRegister PhysReg,
+static bool isValidRegUseOf(const MachineOperand &MO, MCRegister Reg,
const TargetRegisterInfo *TRI) {
if (!isValidRegUse(MO))
return false;
- return TRI->regsOverlap(MO.getReg(), PhysReg);
+ return TRI->regsOverlap(MO.getReg(), Reg);
}
static bool isValidRegDef(const MachineOperand &MO) {
return isValidReg(MO) && MO.isDef();
}
-static bool isValidRegDefOf(const MachineOperand &MO, MCRegister PhysReg,
+static bool isValidRegDefOf(const MachineOperand &MO, MCRegister Reg,
const TargetRegisterInfo *TRI) {
if (!isValidRegDef(MO))
return false;
- return TRI->regsOverlap(MO.getReg(), PhysReg);
+ return TRI->regsOverlap(MO.getReg(), Reg);
}
void ReachingDefAnalysis::enterBasicBlock(MachineBasicBlock *MBB) {
@@ -261,7 +261,7 @@ void ReachingDefAnalysis::traverse() {
}
int ReachingDefAnalysis::getReachingDef(MachineInstr *MI,
- MCRegister PhysReg) const {
+ MCRegister Reg) const {
assert(InstIds.count(MI) && "Unexpected machine instuction.");
int InstId = InstIds.lookup(MI);
int DefRes = ReachingDefDefaultVal;
@@ -269,7 +269,7 @@ int ReachingDefAnalysis::getReachingDef(MachineInstr *MI,
assert(MBBNumber < MBBReachingDefs.numBlockIDs() &&
"Unexpected basic block number.");
int LatestDef = ReachingDefDefaultVal;
- for (MCRegUnit Unit : TRI->regunits(PhysReg)) {
+ for (MCRegUnit Unit : TRI->regunits(Reg)) {
for (int Def : MBBReachingDefs.defs(MBBNumber, Unit)) {
if (Def >= InstId)
break;
@@ -280,22 +280,21 @@ int ReachingDefAnalysis::getReachingDef(MachineInstr *MI,
return LatestDef;
}
-MachineInstr *
-ReachingDefAnalysis::getReachingLocalMIDef(MachineInstr *MI,
- MCRegister PhysReg) const {
- return hasLocalDefBefore(MI, PhysReg)
- ? getInstFromId(MI->getParent(), getReachingDef(MI, PhysReg))
- : nullptr;
+MachineInstr *ReachingDefAnalysis::getReachingLocalMIDef(MachineInstr *MI,
+ MCRegister Reg) const {
+ return hasLocalDefBefore(MI, Reg)
+ ? getInstFromId(MI->getParent(), getReachingDef(MI, Reg))
+ : nullptr;
}
bool ReachingDefAnalysis::hasSameReachingDef(MachineInstr *A, MachineInstr *B,
- MCRegister PhysReg) const {
+ MCRegister Reg) const {
MachineBasicBlock *ParentA = A->getParent();
MachineBasicBlock *ParentB = B->getParent();
if (ParentA != ParentB)
return false;
- return getReachingDef(A, PhysReg) == getReachingDef(B, PhysReg);
+ return getReachingDef(A, Reg) == getReachingDef(B, Reg);
}
MachineInstr *ReachingDefAnalysis::getInstFromId(MachineBasicBlock *MBB,
@@ -318,19 +317,18 @@ MachineInstr *ReachingDefAnalysis::getInstFromId(MachineBasicBlock *MBB,
return nullptr;
}
-int ReachingDefAnalysis::getClearance(MachineInstr *MI,
- MCRegister PhysReg) const {
+int ReachingDefAnalysis::getClearance(MachineInstr *MI, MCRegister Reg) const {
assert(InstIds.count(MI) && "Unexpected machine instuction.");
- return InstIds.lookup(MI) - getReachingDef(MI, PhysReg);
+ return InstIds.lookup(MI) - getReachingDef(MI, Reg);
}
bool ReachingDefAnalysis::hasLocalDefBefore(MachineInstr *MI,
- MCRegister PhysReg) const {
- return getReachingDef(MI, PhysReg) >= 0;
+ MCRegister Reg) const {
+ return getReachingDef(MI, Reg) >= 0;
}
void ReachingDefAnalysis::getReachingLocalUses(MachineInstr *Def,
- MCRegister PhysReg,
+ MCRegister Reg,
InstSet &Uses) const {
MachineBasicBlock *MBB = Def->getParent();
MachineBasicBlock::iterator MI = MachineBasicBlock::iterator(Def);
@@ -340,11 +338,11 @@ void ReachingDefAnalysis::getReachingLocalUses(MachineInstr *Def,
// If/when we find a new reaching def, we know that there's no more uses
// of 'Def'.
- if (getReachingLocalMIDef(&*MI, PhysReg) != Def)
+ if (getReachingLocalMIDef(&*MI, Reg) != Def)
return;
for (auto &MO : MI->operands()) {
- if (!isValidRegUseOf(MO, PhysReg, TRI))
+ if (!isValidRegUseOf(MO, Reg, TRI))
continue;
Uses.insert(&*MI);
@@ -354,15 +352,14 @@ void ReachingDefAnalysis::getReachingLocalUses(MachineInstr *Def,
}
}
-bool ReachingDefAnalysis::getLiveInUses(MachineBasicBlock *MBB,
- MCRegister PhysReg,
+bool ReachingDefAnalysis::getLiveInUses(MachineBasicBlock *MBB, MCRegister Reg,
InstSet &Uses) const {
for (MachineInstr &MI :
instructionsWithoutDebug(MBB->instr_begin(), MBB->instr_end())) {
for (auto &MO : MI.operands()) {
- if (!isValidRegUseOf(MO, PhysReg, TRI))
+ if (!isValidRegUseOf(MO, Reg, TRI))
continue;
- if (getReachingDef(&MI, PhysReg) >= 0)
+ if (getReachingDef(&MI, Reg) >= 0)
return false;
Uses.insert(&MI);
}
@@ -370,18 +367,18 @@ bool ReachingDefAnalysis::getLiveInUses(MachineBasicBlock *MBB,
auto Last = MBB->getLastNonDebugInstr();
if (Last == MBB->end())
return true;
- return isReachingDefLiveOut(&*Last, PhysReg);
+ return isReachingDefLiveOut(&*Last, Reg);
}
-void ReachingDefAnalysis::getGlobalUses(MachineInstr *MI, MCRegister PhysReg,
+void ReachingDefAnalysis::getGlobalUses(MachineInstr *MI, MCRegister Reg,
InstSet &Uses) const {
MachineBasicBlock *MBB = MI->getParent();
// Collect the uses that each def touches within the block.
- getReachingLocalUses(MI, PhysReg, Uses);
+ getReachingLocalUses(MI, Reg, Uses);
// Handle live-out values.
- if (auto *LiveOut = getLocalLiveOutMIDef(MI->getParent(), PhysReg)) {
+ if (auto *LiveOut = getLocalLiveOutMIDef(MI->getParent(), Reg)) {
if (LiveOut != MI)
return;
@@ -389,9 +386,9 @@ void ReachingDefAnalysis::getGlobalUses(MachineInstr *MI, MCRegister PhysReg,
SmallPtrSet<MachineBasicBlock*, 4>Visited;
while (!ToVisit.empty()) {
MachineBasicBlock *MBB = ToVisit.pop_back_val();
- if (Visited.count(MBB) || !MBB->isLiveIn(PhysReg))
+ if (Visited.count(MBB) || !MBB->isLiveIn(Reg))
continue;
- if (getLiveInUses(MBB, PhysReg, Uses))
+ if (getLiveInUses(MBB, Reg, Uses))
llvm::append_range(ToVisit, MBB->successors());
Visited.insert(MBB);
}
@@ -399,25 +396,25 @@ void ReachingDefAnalysis::getGlobalUses(MachineInstr *MI, MCRegister PhysReg,
}
void ReachingDefAnalysis::getGlobalReachingDefs(MachineInstr *MI,
- MCRegister PhysReg,
+ MCRegister Reg,
InstSet &Defs) const {
- if (auto *Def = getUniqueReachingMIDef(MI, PhysReg)) {
+ if (auto *Def = getUniqueReachingMIDef(MI, Reg)) {
Defs.insert(Def);
return;
}
for (auto *MBB : MI->getParent()->predecessors())
- getLiveOuts(MBB, PhysReg, Defs);
+ getLiveOuts(MBB, Reg, Defs);
}
-void ReachingDefAnalysis::getLiveOuts(MachineBasicBlock *MBB,
- MCRegister PhysReg, InstSet &Defs) const {
+void ReachingDefAnalysis::getLiveOuts(MachineBasicBlock *MBB, MCRegister Reg,
+ InstSet &Defs) const {
SmallPtrSet<MachineBasicBlock*, 2> VisitedBBs;
- getLiveOuts(MBB, PhysReg, Defs, VisitedBBs);
+ getLiveOuts(MBB, Reg, Defs, VisitedBBs);
}
-void ReachingDefAnalysis::getLiveOuts(MachineBasicBlock *MBB,
- MCRegister PhysReg, InstSet &Defs,
+void ReachingDefAnalysis::getLiveOuts(MachineBasicBlock *MBB, MCRegister Reg,
+ InstSet &Defs,
BlockSet &VisitedBBs) const {
if (VisitedBBs.count(MBB))
return;
@@ -425,28 +422,28 @@ void ReachingDefAnalysis::getLiveOuts(MachineBasicBlock *MBB,
VisitedBBs.insert(MBB);
LiveRegUnits LiveRegs(*TRI);
LiveRegs.addLiveOuts(*MBB);
- if (LiveRegs.available(PhysReg))
+ if (LiveRegs.available(Reg))
return;
- if (auto *Def = getLocalLiveOutMIDef(MBB, PhysReg))
+ if (auto *Def = getLocalLiveOutMIDef(MBB, Reg))
Defs.insert(Def);
else
for (auto *Pred : MBB->predecessors())
- getLiveOuts(Pred, PhysReg, Defs, VisitedBBs);
+ getLiveOuts(Pred, Reg, Defs, VisitedBBs);
}
MachineInstr *
ReachingDefAnalysis::getUniqueReachingMIDef(MachineInstr *MI,
- MCRegister PhysReg) const {
+ MCRegister Reg) const {
// If there's a local def before MI, return it.
- MachineInstr *LocalDef = getReachingLocalMIDef(MI, PhysReg);
+ MachineInstr *LocalDef = getReachingLocalMIDef(MI, Reg);
if (LocalDef && InstIds.lookup(LocalDef) < InstIds.lookup(MI))
return LocalDef;
SmallPtrSet<MachineInstr*, 2> Incoming;
MachineBasicBlock *Parent = MI->getParent();
for (auto *Pred : Parent->predecessors())
- getLiveOuts(Pred, PhysReg, Incoming);
+ getLiveOuts(Pred, Reg, Incoming);
// Check that we have a single incoming value and that it does not
// come from the same block as MI - since it would mean that the def
@@ -469,13 +466,13 @@ MachineInstr *ReachingDefAnalysis::getMIOperand(MachineInstr *MI,
}
bool ReachingDefAnalysis::isRegUsedAfter(MachineInstr *MI,
- MCRegister PhysReg) const {
+ MCRegister Reg) const {
MachineBasicBlock *MBB = MI->getParent();
LiveRegUnits LiveRegs(*TRI);
LiveRegs.addLiveOuts(*MBB);
// Yes if the register is live out of the basic block.
- if (!LiveRegs.available(PhysReg))
+ if (!LiveRegs.available(Reg))
return true;
// Walk backwards through the block to see if the register is live at some
@@ -483,62 +480,61 @@ bool ReachingDefAnalysis::isRegUsedAfter(MachineInstr *MI,
for (MachineInstr &Last :
instructionsWithoutDebug(MBB->instr_rbegin(), MBB->instr_rend())) {
LiveRegs.stepBackward(Last);
- if (!LiveRegs.available(PhysReg))
+ if (!LiveRegs.available(Reg))
return InstIds.lookup(&Last) > InstIds.lookup(MI);
}
return false;
}
bool ReachingDefAnalysis::isRegDefinedAfter(MachineInstr *MI,
- MCRegister PhysReg) const {
+ MCRegister Reg) const {
MachineBasicBlock *MBB = MI->getParent();
auto Last = MBB->getLastNonDebugInstr();
if (Last != MBB->end() &&
- getReachingDef(MI, PhysReg) != getReachingDef(&*Last, PhysReg))
+ getReachingDef(MI, Reg) != getReachingDef(&*Last, Reg))
return true;
- if (auto *Def = getLocalLiveOutMIDef(MBB, PhysReg))
- return Def == getReachingLocalMIDef(MI, PhysReg);
+ if (auto *Def = getLocalLiveOutMIDef(MBB, Reg))
+ return Def == getReachingLocalMIDef(MI, Reg);
return false;
}
bool ReachingDefAnalysis::isReachingDefLiveOut(MachineInstr *MI,
- MCRegister PhysReg) const {
+ MCRegister Reg) const {
MachineBasicBlock *MBB = MI->getParent();
LiveRegUnits LiveRegs(*TRI);
LiveRegs.addLiveOuts(*MBB);
- if (LiveRegs.available(PhysReg))
+ if (LiveRegs.available(Reg))
return false;
auto Last = MBB->getLastNonDebugInstr();
- int Def = getReachingDef(MI, PhysReg);
- if (Last != MBB->end() && getReachingDef(&*Last, PhysReg) != Def)
+ int Def = getReachingDef(MI, Reg);
+ if (Last != MBB->end() && getReachingDef(&*Last, Reg) != Def)
return false;
// Finally check that the last instruction doesn't redefine the register.
for (auto &MO : Last->operands())
- if (isValidRegDefOf(MO, PhysReg, TRI))
+ if (isValidRegDefOf(MO, Reg, TRI))
return false;
return true;
}
-MachineInstr *
-ReachingDefAnalysis::getLocalLiveOutMIDef(MachineBasicBlock *MBB,
- MCRegister PhysReg) const {
+MachineInstr *ReachingDefAnalysis::getLocalLiveOutMIDef(MachineBasicBlock *MBB,
+ MCRegister Reg) const {
LiveRegUnits LiveRegs(*TRI);
LiveRegs.addLiveOuts(*MBB);
- if (LiveRegs.available(PhysReg))
+ if (LiveRegs.available(Reg))
return nullptr;
auto Last = MBB->getLastNonDebugInstr();
if (Last == MBB->end())
return nullptr;
- int Def = getReachingDef(&*Last, PhysReg);
+ int Def = getReachingDef(&*Last, Reg);
for (auto &MO : Last->operands())
- if (isValidRegDefOf(MO, PhysReg, TRI))
+ if (isValidRegDefOf(MO, Reg, TRI))
return &*Last;
return Def < 0 ? nullptr : getInstFromId(MBB, Def);
@@ -650,7 +646,7 @@ ReachingDefAnalysis::isSafeToRemove(MachineInstr *MI, InstSet &Visited,
void ReachingDefAnalysis::collectKilledOperands(MachineInstr *MI,
InstSet &Dead) const {
Dead.insert(MI);
- auto IsDead = [this, &Dead](MachineInstr *Def, MCRegister PhysReg) {
+ auto IsDead = [this, &Dead](MachineInstr *Def, MCRegister Reg) {
if (mayHaveSideEffects(*Def))
return false;
@@ -666,7 +662,7 @@ void ReachingDefAnalysis::collectKilledOperands(MachineInstr *MI,
return false;
SmallPtrSet<MachineInstr*, 4> Uses;
- getGlobalUses(Def, PhysReg, Uses);
+ getGlobalUses(Def, Reg, Uses);
return llvm::set_is_subset(Uses, Dead);
};
@@ -680,18 +676,18 @@ void ReachingDefAnalysis::collectKilledOperands(MachineInstr *MI,
}
bool ReachingDefAnalysis::isSafeToDefRegAt(MachineInstr *MI,
- MCRegister PhysReg) const {
+ MCRegister Reg) const {
SmallPtrSet<MachineInstr*, 1> Ignore;
- return isSafeToDefRegAt(MI, PhysReg, Ignore);
+ return isSafeToDefRegAt(MI, Reg, Ignore);
}
-bool ReachingDefAnalysis::isSafeToDefRegAt(MachineInstr *MI, MCRegister PhysReg,
+bool ReachingDefAnalysis::isSafeToDefRegAt(MachineInstr *MI, MCRegister Reg,
InstSet &Ignore) const {
// Check for any uses of the register after MI.
- if (isRegUsedAfter(MI, PhysReg)) {
- if (auto *Def = getReachingLocalMIDef(MI, PhysReg)) {
+ if (isRegUsedAfter(MI, Reg)) {
+ if (auto *Def = getReachingLocalMIDef(MI, Reg)) {
SmallPtrSet<MachineInstr*, 2> Uses;
- getGlobalUses(Def, PhysReg, Uses);
+ getGlobalUses(Def, Reg, Uses);
if (!llvm::set_is_subset(Uses, Ignore))
return false;
} else
@@ -700,13 +696,13 @@ bool ReachingDefAnalysis::isSafeToDefRegAt(MachineInstr *MI, MCRegister PhysReg,
MachineBasicBlock *MBB = MI->getParent();
// Check for any defs after MI.
- if (isRegDefinedAfter(MI, PhysReg)) {
+ if (isRegDefinedAfter(MI, Reg)) {
auto I = MachineBasicBlock::iterator(MI);
for (auto E = MBB->end(); I != E; ++I) {
if (Ignore.count(&*I))
continue;
for (auto &MO : I->operands())
- if (isValidRegDefOf(MO, PhysReg, TRI))
+ if (isValidRegDefOf(MO, Reg, TRI))
return false;
}
}
diff --git a/llvm/lib/CodeGen/RegAllocGreedy.cpp b/llvm/lib/CodeGen/RegAllocGreedy.cpp
index 4fa2bc7..b94992c 100644
--- a/llvm/lib/CodeGen/RegAllocGreedy.cpp
+++ b/llvm/lib/CodeGen/RegAllocGreedy.cpp
@@ -140,7 +140,7 @@ static cl::opt<bool> GreedyReverseLocalAssignment(
static cl::opt<unsigned> SplitThresholdForRegWithHint(
"split-threshold-for-reg-with-hint",
cl::desc("The threshold for splitting a virtual register with a hint, in "
- "percentate"),
+ "percentage"),
cl::init(75), cl::Hidden);
static RegisterRegAlloc greedyRegAlloc("greedy", "greedy register allocator",
@@ -376,6 +376,12 @@ unsigned DefaultPriorityAdvisor::getPriority(const LiveInterval &LI) const {
return Prio;
}
+unsigned DummyPriorityAdvisor::getPriority(const LiveInterval &LI) const {
+ // Prioritize by virtual register number, lowest first.
+ Register Reg = LI.reg();
+ return ~Reg.virtRegIndex();
+}
+
const LiveInterval *RAGreedy::dequeue() { return dequeue(Queue); }
const LiveInterval *RAGreedy::dequeue(PQueue &CurQueue) {
@@ -2029,6 +2035,9 @@ unsigned RAGreedy::tryLastChanceRecoloring(const LiveInterval &VirtReg,
// available colors.
Matrix->assign(VirtReg, PhysReg);
+ // VirtReg may be deleted during tryRecoloringCandidates, save a copy.
+ Register ThisVirtReg = VirtReg.reg();
+
// Save the current recoloring state.
// If we cannot recolor all the interferences, we will have to start again
// at this point for the next physical register.
@@ -2040,8 +2049,16 @@ unsigned RAGreedy::tryLastChanceRecoloring(const LiveInterval &VirtReg,
NewVRegs.push_back(NewVReg);
// Do not mess up with the global assignment process.
// I.e., VirtReg must be unassigned.
- Matrix->unassign(VirtReg);
- return PhysReg;
+ if (VRM->hasPhys(ThisVirtReg)) {
+ Matrix->unassign(VirtReg);
+ return PhysReg;
+ }
+
+ // It is possible VirtReg will be deleted during tryRecoloringCandidates.
+ LLVM_DEBUG(dbgs() << "tryRecoloringCandidates deleted a fixed register "
+ << printReg(ThisVirtReg) << '\n');
+ FixedRegisters.erase(ThisVirtReg);
+ return 0;
}
LLVM_DEBUG(dbgs() << "Fail to assign: " << VirtReg << " to "
diff --git a/llvm/lib/CodeGen/RegAllocPriorityAdvisor.cpp b/llvm/lib/CodeGen/RegAllocPriorityAdvisor.cpp
index 0650aaf..4525b8f 100644
--- a/llvm/lib/CodeGen/RegAllocPriorityAdvisor.cpp
+++ b/llvm/lib/CodeGen/RegAllocPriorityAdvisor.cpp
@@ -30,7 +30,10 @@ static cl::opt<RegAllocPriorityAdvisorAnalysis::AdvisorMode> Mode(
clEnumValN(RegAllocPriorityAdvisorAnalysis::AdvisorMode::Release,
"release", "precompiled"),
clEnumValN(RegAllocPriorityAdvisorAnalysis::AdvisorMode::Development,
- "development", "for training")));
+ "development", "for training"),
+ clEnumValN(
+ RegAllocPriorityAdvisorAnalysis::AdvisorMode::Dummy, "dummy",
+ "prioritize low virtual register numbers for test and debug")));
char RegAllocPriorityAdvisorAnalysis::ID = 0;
INITIALIZE_PASS(RegAllocPriorityAdvisorAnalysis, "regalloc-priority",
@@ -67,6 +70,31 @@ private:
}
const bool NotAsRequested;
};
+
+class DummyPriorityAdvisorAnalysis final
+ : public RegAllocPriorityAdvisorAnalysis {
+public:
+ DummyPriorityAdvisorAnalysis()
+ : RegAllocPriorityAdvisorAnalysis(AdvisorMode::Dummy) {}
+
+ // support for isa<> and dyn_cast.
+ static bool classof(const RegAllocPriorityAdvisorAnalysis *R) {
+ return R->getAdvisorMode() == AdvisorMode::Dummy;
+ }
+
+private:
+ void getAnalysisUsage(AnalysisUsage &AU) const override {
+ AU.addRequired<SlotIndexesWrapperPass>();
+ RegAllocPriorityAdvisorAnalysis::getAnalysisUsage(AU);
+ }
+
+ std::unique_ptr<RegAllocPriorityAdvisor>
+ getAdvisor(const MachineFunction &MF, const RAGreedy &RA) override {
+ return std::make_unique<DummyPriorityAdvisor>(
+ MF, RA, &getAnalysis<SlotIndexesWrapperPass>().getSI());
+ }
+};
+
} // namespace
template <> Pass *llvm::callDefaultCtor<RegAllocPriorityAdvisorAnalysis>() {
@@ -75,6 +103,9 @@ template <> Pass *llvm::callDefaultCtor<RegAllocPriorityAdvisorAnalysis>() {
case RegAllocPriorityAdvisorAnalysis::AdvisorMode::Default:
Ret = new DefaultPriorityAdvisorAnalysis(/*NotAsRequested*/ false);
break;
+ case RegAllocPriorityAdvisorAnalysis::AdvisorMode::Dummy:
+ Ret = new DummyPriorityAdvisorAnalysis();
+ break;
case RegAllocPriorityAdvisorAnalysis::AdvisorMode::Development:
#if defined(LLVM_HAVE_TFLITE)
Ret = createDevelopmentModePriorityAdvisor();
@@ -97,6 +128,8 @@ StringRef RegAllocPriorityAdvisorAnalysis::getPassName() const {
return "Release mode Regalloc Priority Advisor";
case AdvisorMode::Development:
return "Development mode Regalloc Priority Advisor";
+ case AdvisorMode::Dummy:
+ return "Dummy Regalloc Priority Advisor";
}
llvm_unreachable("Unknown advisor kind");
}
diff --git a/llvm/lib/CodeGen/RegAllocPriorityAdvisor.h b/llvm/lib/CodeGen/RegAllocPriorityAdvisor.h
index 1e9fa96..32e4598 100644
--- a/llvm/lib/CodeGen/RegAllocPriorityAdvisor.h
+++ b/llvm/lib/CodeGen/RegAllocPriorityAdvisor.h
@@ -56,9 +56,21 @@ private:
unsigned getPriority(const LiveInterval &LI) const override;
};
+/// Stupid priority advisor which just enqueues in virtual register number
+/// order, for debug purposes only.
+class DummyPriorityAdvisor : public RegAllocPriorityAdvisor {
+public:
+ DummyPriorityAdvisor(const MachineFunction &MF, const RAGreedy &RA,
+ SlotIndexes *const Indexes)
+ : RegAllocPriorityAdvisor(MF, RA, Indexes) {}
+
+private:
+ unsigned getPriority(const LiveInterval &LI) const override;
+};
+
class RegAllocPriorityAdvisorAnalysis : public ImmutablePass {
public:
- enum class AdvisorMode : int { Default, Release, Development };
+ enum class AdvisorMode : int { Default, Release, Development, Dummy };
RegAllocPriorityAdvisorAnalysis(AdvisorMode Mode)
: ImmutablePass(ID), Mode(Mode){};
diff --git a/llvm/lib/CodeGen/RegisterCoalescer.cpp b/llvm/lib/CodeGen/RegisterCoalescer.cpp
index 20ad644..8313927 100644
--- a/llvm/lib/CodeGen/RegisterCoalescer.cpp
+++ b/llvm/lib/CodeGen/RegisterCoalescer.cpp
@@ -113,7 +113,7 @@ static cl::opt<unsigned> LargeIntervalSizeThreshold(
static cl::opt<unsigned> LargeIntervalFreqThreshold(
"large-interval-freq-threshold", cl::Hidden,
- cl::desc("For a large interval, if it is coalesed with other live "
+ cl::desc("For a large interval, if it is coalesced with other live "
"intervals many times more than the threshold, stop its "
"coalescing to control the compile time. "),
cl::init(256));
@@ -1325,11 +1325,6 @@ bool RegisterCoalescer::reMaterializeTrivialDef(const CoalescerPair &CP,
const MCInstrDesc &MCID = DefMI->getDesc();
if (MCID.getNumDefs() != 1)
return false;
- // Only support subregister destinations when the def is read-undef.
- MachineOperand &DstOperand = CopyMI->getOperand(0);
- Register CopyDstReg = DstOperand.getReg();
- if (DstOperand.getSubReg() && !DstOperand.isUndef())
- return false;
// If both SrcIdx and DstIdx are set, correct rematerialization would widen
// the register substantially (beyond both source and dest size). This is bad
@@ -1339,6 +1334,32 @@ bool RegisterCoalescer::reMaterializeTrivialDef(const CoalescerPair &CP,
if (SrcIdx && DstIdx)
return false;
+ // Only support subregister destinations when the def is read-undef.
+ MachineOperand &DstOperand = CopyMI->getOperand(0);
+ Register CopyDstReg = DstOperand.getReg();
+ if (DstOperand.getSubReg() && !DstOperand.isUndef())
+ return false;
+
+ // In the physical register case, checking that the def is read-undef is not
+ // enough. We're widening the def and need to avoid clobbering other live
+ // values in the unused register pieces.
+ //
+ // TODO: Targets may support rewriting the rematerialized instruction to only
+ // touch relevant lanes, in which case we don't need any liveness check.
+ if (CopyDstReg.isPhysical() && CP.isPartial()) {
+ for (MCRegUnit Unit : TRI->regunits(DstReg)) {
+ // Ignore the register units we are writing anyway.
+ if (is_contained(TRI->regunits(CopyDstReg), Unit))
+ continue;
+
+ // Check if the other lanes we are defining are live at the
+ // rematerialization point.
+ LiveRange &LR = LIS->getRegUnit(Unit);
+ if (LR.liveAt(CopyIdx))
+ return false;
+ }
+ }
+
const unsigned DefSubIdx = DefMI->getOperand(0).getSubReg();
const TargetRegisterClass *DefRC = TII->getRegClass(MCID, 0, TRI, *MF);
if (!DefMI->isImplicitDef()) {
@@ -1375,27 +1396,6 @@ bool RegisterCoalescer::reMaterializeTrivialDef(const CoalescerPair &CP,
NewMI.setDebugLoc(DL);
// In a situation like the following:
- //
- // undef %2.subreg:reg = INST %1:reg ; DefMI (rematerializable),
- // ; DefSubIdx = subreg
- // %3:reg = COPY %2 ; SrcIdx = DstIdx = 0
- // .... = SOMEINSTR %3:reg
- //
- // there are no subranges for %3 so after rematerialization we need
- // to explicitly create them. Undefined subranges are removed later on.
- if (DstReg.isVirtual() && DefSubIdx && !CP.getSrcIdx() && !CP.getDstIdx() &&
- MRI->shouldTrackSubRegLiveness(DstReg)) {
- LiveInterval &DstInt = LIS->getInterval(DstReg);
- if (!DstInt.hasSubRanges()) {
- LaneBitmask FullMask = MRI->getMaxLaneMaskForVReg(DstReg);
- LaneBitmask UsedLanes = TRI->getSubRegIndexLaneMask(DefSubIdx);
- LaneBitmask UnusedLanes = FullMask & ~UsedLanes;
- DstInt.createSubRangeFrom(LIS->getVNInfoAllocator(), UsedLanes, DstInt);
- DstInt.createSubRangeFrom(LIS->getVNInfoAllocator(), UnusedLanes, DstInt);
- }
- }
-
- // In a situation like the following:
// %0:subreg = instr ; DefMI, subreg = DstIdx
// %1 = copy %0:subreg ; CopyMI, SrcIdx = 0
// instead of widening %1 to the register class of %0 simply do:
@@ -1523,6 +1523,27 @@ bool RegisterCoalescer::reMaterializeTrivialDef(const CoalescerPair &CP,
// sure that "undef" is not set.
if (NewIdx == 0)
NewMI.getOperand(0).setIsUndef(false);
+
+ // In a situation like the following:
+ //
+ // undef %2.subreg:reg = INST %1:reg ; DefMI (rematerializable),
+ // ; Defines only some of lanes,
+ // ; so DefSubIdx = NewIdx = subreg
+ // %3:reg = COPY %2 ; Copy full reg
+ // .... = SOMEINSTR %3:reg ; Use full reg
+ //
+ // there are no subranges for %3 so after rematerialization we need
+ // to explicitly create them. Undefined subranges are removed later on.
+ if (NewIdx && !DstInt.hasSubRanges() &&
+ MRI->shouldTrackSubRegLiveness(DstReg)) {
+ LaneBitmask FullMask = MRI->getMaxLaneMaskForVReg(DstReg);
+ LaneBitmask UsedLanes = TRI->getSubRegIndexLaneMask(NewIdx);
+ LaneBitmask UnusedLanes = FullMask & ~UsedLanes;
+ VNInfo::Allocator &Alloc = LIS->getVNInfoAllocator();
+ DstInt.createSubRangeFrom(Alloc, UsedLanes, DstInt);
+ DstInt.createSubRangeFrom(Alloc, UnusedLanes, DstInt);
+ }
+
// Add dead subregister definitions if we are defining the whole register
// but only part of it is live.
// This could happen if the rematerialization instruction is rematerializing
diff --git a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
index 6cbfef2..da3c834 100644
--- a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
@@ -141,7 +141,7 @@ static cl::opt<bool> EnableReduceLoadOpStoreWidth(
static cl::opt<bool> ReduceLoadOpStoreWidthForceNarrowingProfitable(
"combiner-reduce-load-op-store-width-force-narrowing-profitable",
cl::Hidden, cl::init(false),
- cl::desc("DAG combiner force override the narrowing profitable check when"
+ cl::desc("DAG combiner force override the narrowing profitable check when "
"reducing the width of load/op/store sequences"));
static cl::opt<bool> EnableShrinkLoadReplaceStoreWithStore(
@@ -3949,6 +3949,23 @@ SDValue DAGCombiner::visitSUB(SDNode *N) {
if (SDValue Result = TLI.expandABS(N1.getNode(), DAG, true))
return Result;
+ // Similar to the previous rule, but this time targeting an expanded abs.
+ // (sub 0, (max X, (sub 0, X))) --> (min X, (sub 0, X))
+ // as well as
+ // (sub 0, (min X, (sub 0, X))) --> (max X, (sub 0, X))
+ // Note that these two are applicable to both signed and unsigned min/max.
+ SDValue X;
+ SDValue S0;
+ auto NegPat = m_AllOf(m_Neg(m_Deferred(X)), m_Value(S0));
+ if (sd_match(N1, m_OneUse(m_AnyOf(m_SMax(m_Value(X), NegPat),
+ m_UMax(m_Value(X), NegPat),
+ m_SMin(m_Value(X), NegPat),
+ m_UMin(m_Value(X), NegPat))))) {
+ unsigned NewOpc = ISD::getInverseMinMaxOpcode(N1->getOpcode());
+ if (hasOperation(NewOpc, VT))
+ return DAG.getNode(NewOpc, DL, VT, X, S0);
+ }
+
// Fold neg(splat(neg(x)) -> splat(x)
if (VT.isVector()) {
SDValue N1S = DAG.getSplatValue(N1, true);
@@ -20438,10 +20455,8 @@ SDValue DAGCombiner::TransformFPLoadStorePair(SDNode *N) {
Value.hasOneUse()) {
LoadSDNode *LD = cast<LoadSDNode>(Value);
EVT VT = LD->getMemoryVT();
- if (!VT.isFloatingPoint() ||
- VT != ST->getMemoryVT() ||
- LD->isNonTemporal() ||
- ST->isNonTemporal() ||
+ if (!VT.isSimple() || !VT.isFloatingPoint() || VT != ST->getMemoryVT() ||
+ LD->isNonTemporal() || ST->isNonTemporal() ||
LD->getPointerInfo().getAddrSpace() != 0 ||
ST->getPointerInfo().getAddrSpace() != 0)
return SDValue();
@@ -23088,8 +23103,11 @@ SDValue DAGCombiner::visitEXTRACT_VECTOR_ELT(SDNode *N) {
if (ExtractIndex == BCTruncElt && BCSrc.getValueType().isScalarInteger())
return DAG.getAnyExtOrTrunc(BCSrc, DL, ScalarVT);
+ // TODO: Add support for SCALAR_TO_VECTOR implicit truncation.
if (LegalTypes && BCSrc.getValueType().isInteger() &&
- BCSrc.getOpcode() == ISD::SCALAR_TO_VECTOR) {
+ BCSrc.getOpcode() == ISD::SCALAR_TO_VECTOR &&
+ BCSrc.getScalarValueSizeInBits() ==
+ BCSrc.getOperand(0).getScalarValueSizeInBits()) {
// ext_elt (bitcast (scalar_to_vec i64 X to v2i64) to v4i32), TruncElt -->
// trunc i64 X to i32
SDValue X = BCSrc.getOperand(0);
@@ -24288,8 +24306,8 @@ static SDValue combineConcatVectorOfScalars(SDNode *N, SelectionDAG &DAG) {
EVT SVT = EVT::getIntegerVT(*DAG.getContext(), OpVT.getSizeInBits());
// Keep track of what we encounter.
- bool AnyInteger = false;
- bool AnyFP = false;
+ EVT AnyFPVT;
+
for (const SDValue &Op : N->ops()) {
if (ISD::BITCAST == Op.getOpcode() &&
!Op.getOperand(0).getValueType().isVector())
@@ -24303,27 +24321,23 @@ static SDValue combineConcatVectorOfScalars(SDNode *N, SelectionDAG &DAG) {
// If it's neither, bail out, it could be something weird like x86mmx.
EVT LastOpVT = Ops.back().getValueType();
if (LastOpVT.isFloatingPoint())
- AnyFP = true;
- else if (LastOpVT.isInteger())
- AnyInteger = true;
- else
+ AnyFPVT = LastOpVT;
+ else if (!LastOpVT.isInteger())
return SDValue();
}
// If any of the operands is a floating point scalar bitcast to a vector,
// use floating point types throughout, and bitcast everything.
// Replace UNDEFs by another scalar UNDEF node, of the final desired type.
- if (AnyFP) {
- SVT = EVT::getFloatingPointVT(OpVT.getSizeInBits());
- if (AnyInteger) {
- for (SDValue &Op : Ops) {
- if (Op.getValueType() == SVT)
- continue;
- if (Op.isUndef())
- Op = DAG.getNode(ISD::UNDEF, DL, SVT);
- else
- Op = DAG.getBitcast(SVT, Op);
- }
+ if (AnyFPVT != EVT()) {
+ SVT = AnyFPVT;
+ for (SDValue &Op : Ops) {
+ if (Op.getValueType() == SVT)
+ continue;
+ if (Op.isUndef())
+ Op = DAG.getNode(ISD::UNDEF, DL, SVT);
+ else
+ Op = DAG.getBitcast(SVT, Op);
}
}
diff --git a/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp b/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp
index db21e70..89a00c5 100644
--- a/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp
@@ -402,6 +402,8 @@ SDValue VectorLegalizer::LegalizeOp(SDValue Op) {
case ISD::FMAXNUM_IEEE:
case ISD::FMINIMUM:
case ISD::FMAXIMUM:
+ case ISD::FMINIMUMNUM:
+ case ISD::FMAXIMUMNUM:
case ISD::FCOPYSIGN:
case ISD::FSQRT:
case ISD::FSIN:
@@ -1081,6 +1083,10 @@ void VectorLegalizer::Expand(SDNode *Node, SmallVectorImpl<SDValue> &Results) {
case ISD::FMAXIMUM:
Results.push_back(TLI.expandFMINIMUM_FMAXIMUM(Node, DAG));
return;
+ case ISD::FMINIMUMNUM:
+ case ISD::FMAXIMUMNUM:
+ Results.push_back(TLI.expandFMINIMUMNUM_FMAXIMUMNUM(Node, DAG));
+ return;
case ISD::SMIN:
case ISD::SMAX:
case ISD::UMIN:
@@ -1738,7 +1744,8 @@ void VectorLegalizer::ExpandUINT_TO_FLOAT(SDNode *Node,
bool IsStrict = Node->isStrictFPOpcode();
unsigned OpNo = IsStrict ? 1 : 0;
SDValue Src = Node->getOperand(OpNo);
- EVT VT = Src.getValueType();
+ EVT SrcVT = Src.getValueType();
+ EVT DstVT = Node->getValueType(0);
SDLoc DL(Node);
// Attempt to expand using TargetLowering.
@@ -1752,11 +1759,11 @@ void VectorLegalizer::ExpandUINT_TO_FLOAT(SDNode *Node,
}
// Make sure that the SINT_TO_FP and SRL instructions are available.
- if (((!IsStrict && TLI.getOperationAction(ISD::SINT_TO_FP, VT) ==
+ if (((!IsStrict && TLI.getOperationAction(ISD::SINT_TO_FP, SrcVT) ==
TargetLowering::Expand) ||
- (IsStrict && TLI.getOperationAction(ISD::STRICT_SINT_TO_FP, VT) ==
+ (IsStrict && TLI.getOperationAction(ISD::STRICT_SINT_TO_FP, SrcVT) ==
TargetLowering::Expand)) ||
- TLI.getOperationAction(ISD::SRL, VT) == TargetLowering::Expand) {
+ TLI.getOperationAction(ISD::SRL, SrcVT) == TargetLowering::Expand) {
if (IsStrict) {
UnrollStrictFPOp(Node, Results);
return;
@@ -1766,37 +1773,59 @@ void VectorLegalizer::ExpandUINT_TO_FLOAT(SDNode *Node,
return;
}
- unsigned BW = VT.getScalarSizeInBits();
+ unsigned BW = SrcVT.getScalarSizeInBits();
assert((BW == 64 || BW == 32) &&
"Elements in vector-UINT_TO_FP must be 32 or 64 bits wide");
- SDValue HalfWord = DAG.getConstant(BW / 2, DL, VT);
+ // If STRICT_/FMUL is not supported by the target (in case of f16) replace the
+ // UINT_TO_FP with a larger float and round to the smaller type
+ if ((!IsStrict && !TLI.isOperationLegalOrCustom(ISD::FMUL, DstVT)) ||
+ (IsStrict && !TLI.isOperationLegalOrCustom(ISD::STRICT_FMUL, DstVT))) {
+ EVT FPVT = BW == 32 ? MVT::f32 : MVT::f64;
+ SDValue UIToFP;
+ SDValue Result;
+ SDValue TargetZero = DAG.getIntPtrConstant(0, DL, /*isTarget=*/true);
+ EVT FloatVecVT = SrcVT.changeVectorElementType(FPVT);
+ if (IsStrict) {
+ UIToFP = DAG.getNode(ISD::STRICT_UINT_TO_FP, DL, {FloatVecVT, MVT::Other},
+ {Node->getOperand(0), Src});
+ Result = DAG.getNode(ISD::STRICT_FP_ROUND, DL, {DstVT, MVT::Other},
+ {Node->getOperand(0), UIToFP, TargetZero});
+ Results.push_back(Result);
+ Results.push_back(Result.getValue(1));
+ } else {
+ UIToFP = DAG.getNode(ISD::UINT_TO_FP, DL, FloatVecVT, Src);
+ Result = DAG.getNode(ISD::FP_ROUND, DL, DstVT, UIToFP, TargetZero);
+ Results.push_back(Result);
+ }
+
+ return;
+ }
+
+ SDValue HalfWord = DAG.getConstant(BW / 2, DL, SrcVT);
// Constants to clear the upper part of the word.
// Notice that we can also use SHL+SHR, but using a constant is slightly
// faster on x86.
uint64_t HWMask = (BW == 64) ? 0x00000000FFFFFFFF : 0x0000FFFF;
- SDValue HalfWordMask = DAG.getConstant(HWMask, DL, VT);
+ SDValue HalfWordMask = DAG.getConstant(HWMask, DL, SrcVT);
// Two to the power of half-word-size.
- SDValue TWOHW =
- DAG.getConstantFP(1ULL << (BW / 2), DL, Node->getValueType(0));
+ SDValue TWOHW = DAG.getConstantFP(1ULL << (BW / 2), DL, DstVT);
// Clear upper part of LO, lower HI
- SDValue HI = DAG.getNode(ISD::SRL, DL, VT, Src, HalfWord);
- SDValue LO = DAG.getNode(ISD::AND, DL, VT, Src, HalfWordMask);
+ SDValue HI = DAG.getNode(ISD::SRL, DL, SrcVT, Src, HalfWord);
+ SDValue LO = DAG.getNode(ISD::AND, DL, SrcVT, Src, HalfWordMask);
if (IsStrict) {
// Convert hi and lo to floats
// Convert the hi part back to the upper values
// TODO: Can any fast-math-flags be set on these nodes?
- SDValue fHI = DAG.getNode(ISD::STRICT_SINT_TO_FP, DL,
- {Node->getValueType(0), MVT::Other},
+ SDValue fHI = DAG.getNode(ISD::STRICT_SINT_TO_FP, DL, {DstVT, MVT::Other},
{Node->getOperand(0), HI});
- fHI = DAG.getNode(ISD::STRICT_FMUL, DL, {Node->getValueType(0), MVT::Other},
+ fHI = DAG.getNode(ISD::STRICT_FMUL, DL, {DstVT, MVT::Other},
{fHI.getValue(1), fHI, TWOHW});
- SDValue fLO = DAG.getNode(ISD::STRICT_SINT_TO_FP, DL,
- {Node->getValueType(0), MVT::Other},
+ SDValue fLO = DAG.getNode(ISD::STRICT_SINT_TO_FP, DL, {DstVT, MVT::Other},
{Node->getOperand(0), LO});
SDValue TF = DAG.getNode(ISD::TokenFactor, DL, MVT::Other, fHI.getValue(1),
@@ -1804,8 +1833,7 @@ void VectorLegalizer::ExpandUINT_TO_FLOAT(SDNode *Node,
// Add the two halves
SDValue Result =
- DAG.getNode(ISD::STRICT_FADD, DL, {Node->getValueType(0), MVT::Other},
- {TF, fHI, fLO});
+ DAG.getNode(ISD::STRICT_FADD, DL, {DstVT, MVT::Other}, {TF, fHI, fLO});
Results.push_back(Result);
Results.push_back(Result.getValue(1));
@@ -1815,13 +1843,12 @@ void VectorLegalizer::ExpandUINT_TO_FLOAT(SDNode *Node,
// Convert hi and lo to floats
// Convert the hi part back to the upper values
// TODO: Can any fast-math-flags be set on these nodes?
- SDValue fHI = DAG.getNode(ISD::SINT_TO_FP, DL, Node->getValueType(0), HI);
- fHI = DAG.getNode(ISD::FMUL, DL, Node->getValueType(0), fHI, TWOHW);
- SDValue fLO = DAG.getNode(ISD::SINT_TO_FP, DL, Node->getValueType(0), LO);
+ SDValue fHI = DAG.getNode(ISD::SINT_TO_FP, DL, DstVT, HI);
+ fHI = DAG.getNode(ISD::FMUL, DL, DstVT, fHI, TWOHW);
+ SDValue fLO = DAG.getNode(ISD::SINT_TO_FP, DL, DstVT, LO);
// Add the two halves
- Results.push_back(
- DAG.getNode(ISD::FADD, DL, Node->getValueType(0), fHI, fLO));
+ Results.push_back(DAG.getNode(ISD::FADD, DL, DstVT, fHI, fLO));
}
SDValue VectorLegalizer::ExpandFNEG(SDNode *Node) {
@@ -2246,11 +2273,13 @@ SDValue VectorLegalizer::UnrollVSETCC(SDNode *Node) {
DAG.getVectorIdxConstant(i, dl));
SDValue RHSElem = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, TmpEltVT, RHS,
DAG.getVectorIdxConstant(i, dl));
+ // FIXME: We should use i1 setcc + boolext here, but it causes regressions.
Ops[i] = DAG.getNode(ISD::SETCC, dl,
TLI.getSetCCResultType(DAG.getDataLayout(),
*DAG.getContext(), TmpEltVT),
LHSElem, RHSElem, CC);
- Ops[i] = DAG.getSelect(dl, EltVT, Ops[i], DAG.getAllOnesConstant(dl, EltVT),
+ Ops[i] = DAG.getSelect(dl, EltVT, Ops[i],
+ DAG.getBoolConstant(true, dl, EltVT, VT),
DAG.getConstant(0, dl, EltVT));
}
return DAG.getBuildVector(VT, dl, Ops);
diff --git a/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp b/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp
index 107454a..780eba1 100644
--- a/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp
@@ -149,6 +149,8 @@ void DAGTypeLegalizer::ScalarizeVectorResult(SDNode *N, unsigned ResNo) {
case ISD::FMAXNUM_IEEE:
case ISD::FMINIMUM:
case ISD::FMAXIMUM:
+ case ISD::FMINIMUMNUM:
+ case ISD::FMAXIMUMNUM:
case ISD::FLDEXP:
case ISD::ABDS:
case ISD::ABDU:
diff --git a/llvm/lib/CodeGen/SelectionDAG/ScheduleDAGRRList.cpp b/llvm/lib/CodeGen/SelectionDAG/ScheduleDAGRRList.cpp
index 9e5867c..51ee3cc 100644
--- a/llvm/lib/CodeGen/SelectionDAG/ScheduleDAGRRList.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/ScheduleDAGRRList.cpp
@@ -125,9 +125,9 @@ static cl::opt<int> MaxReorderWindow(
cl::desc("Number of instructions to allow ahead of the critical path "
"in sched=list-ilp"));
-static cl::opt<unsigned> AvgIPC(
- "sched-avg-ipc", cl::Hidden, cl::init(1),
- cl::desc("Average inst/cycle whan no target itinerary exists."));
+static cl::opt<unsigned>
+ AvgIPC("sched-avg-ipc", cl::Hidden, cl::init(1),
+ cl::desc("Average inst/cycle when no target itinerary exists."));
namespace {
diff --git a/llvm/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.cpp b/llvm/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.cpp
index 26fc75c..dff7243 100644
--- a/llvm/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.cpp
@@ -43,9 +43,9 @@ STATISTIC(LoadsClustered, "Number of loads clustered together");
// without a target itinerary. The choice of number here has more to do with
// balancing scheduler heuristics than with the actual machine latency.
static cl::opt<int> HighLatencyCycles(
- "sched-high-latency-cycles", cl::Hidden, cl::init(10),
- cl::desc("Roughly estimate the number of cycles that 'long latency'"
- "instructions take for targets with no itinerary"));
+ "sched-high-latency-cycles", cl::Hidden, cl::init(10),
+ cl::desc("Roughly estimate the number of cycles that 'long latency' "
+ "instructions take for targets with no itinerary"));
ScheduleDAGSDNodes::ScheduleDAGSDNodes(MachineFunction &mf)
: ScheduleDAG(mf), InstrItins(mf.getSubtarget().getInstrItineraryData()) {}
diff --git a/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp b/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
index 10e8ba9..0dfd030 100644
--- a/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
@@ -430,6 +430,21 @@ bool ISD::matchBinaryPredicate(
return true;
}
+ISD::NodeType ISD::getInverseMinMaxOpcode(unsigned MinMaxOpc) {
+ switch (MinMaxOpc) {
+ default:
+ llvm_unreachable("unrecognized opcode");
+ case ISD::UMIN:
+ return ISD::UMAX;
+ case ISD::UMAX:
+ return ISD::UMIN;
+ case ISD::SMIN:
+ return ISD::SMAX;
+ case ISD::SMAX:
+ return ISD::SMIN;
+ }
+}
+
ISD::NodeType ISD::getVecReduceBaseOpcode(unsigned VecReduceOpcode) {
switch (VecReduceOpcode) {
default:
diff --git a/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp b/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp
index e87d809..9f57884 100644
--- a/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp
@@ -8435,7 +8435,6 @@ bool TargetLowering::expandUINT_TO_FP(SDNode *Node, SDValue &Result,
return false;
SDLoc dl(SDValue(Node, 0));
- EVT ShiftVT = getShiftAmountTy(SrcVT, DAG.getDataLayout());
// Implementation of unsigned i64 to f64 following the algorithm in
// __floatundidf in compiler_rt. This implementation performs rounding
@@ -8448,7 +8447,7 @@ bool TargetLowering::expandUINT_TO_FP(SDNode *Node, SDValue &Result,
llvm::bit_cast<double>(UINT64_C(0x4530000000100000)), dl, DstVT);
SDValue TwoP84 = DAG.getConstant(UINT64_C(0x4530000000000000), dl, SrcVT);
SDValue LoMask = DAG.getConstant(UINT64_C(0x00000000FFFFFFFF), dl, SrcVT);
- SDValue HiShift = DAG.getConstant(32, dl, ShiftVT);
+ SDValue HiShift = DAG.getShiftAmountConstant(32, SrcVT, dl);
SDValue Lo = DAG.getNode(ISD::AND, dl, SrcVT, Src, LoMask);
SDValue Hi = DAG.getNode(ISD::SRL, dl, SrcVT, Src, HiShift);
diff --git a/llvm/lib/CodeGen/StackMapLivenessAnalysis.cpp b/llvm/lib/CodeGen/StackMapLivenessAnalysis.cpp
index 687acd9..8437422 100644
--- a/llvm/lib/CodeGen/StackMapLivenessAnalysis.cpp
+++ b/llvm/lib/CodeGen/StackMapLivenessAnalysis.cpp
@@ -106,8 +106,6 @@ bool StackMapLiveness::runOnMachineFunction(MachineFunction &MF) {
if (!EnablePatchPointLiveness)
return false;
- LLVM_DEBUG(dbgs() << "********** COMPUTING STACKMAP LIVENESS: "
- << MF.getName() << " **********\n");
TRI = MF.getSubtarget().getRegisterInfo();
++NumStackMapFuncVisited;
@@ -121,6 +119,8 @@ bool StackMapLiveness::runOnMachineFunction(MachineFunction &MF) {
/// Performs the actual liveness calculation for the function.
bool StackMapLiveness::calculateLiveness(MachineFunction &MF) {
+ LLVM_DEBUG(dbgs() << "********** COMPUTING STACKMAP LIVENESS: "
+ << MF.getName() << " **********\n");
bool HasChanged = false;
// For all basic blocks in the function.
for (auto &MBB : MF) {
diff --git a/llvm/lib/CodeGen/SwiftErrorValueTracking.cpp b/llvm/lib/CodeGen/SwiftErrorValueTracking.cpp
index 74a94d6..decffdc 100644
--- a/llvm/lib/CodeGen/SwiftErrorValueTracking.cpp
+++ b/llvm/lib/CodeGen/SwiftErrorValueTracking.cpp
@@ -259,7 +259,7 @@ void SwiftErrorValueTracking::propagateVRegs() {
for (const auto &Use : VRegUpwardsUse) {
const MachineBasicBlock *UseBB = Use.first.first;
Register VReg = Use.second;
- if (!MRI.def_begin(VReg).atEnd())
+ if (!MRI.def_empty(VReg))
continue;
#ifdef EXPENSIVE_CHECKS
diff --git a/llvm/lib/CodeGen/TargetPassConfig.cpp b/llvm/lib/CodeGen/TargetPassConfig.cpp
index d407e9f..5c05589 100644
--- a/llvm/lib/CodeGen/TargetPassConfig.cpp
+++ b/llvm/lib/CodeGen/TargetPassConfig.cpp
@@ -113,8 +113,6 @@ static cl::opt<bool> EnableImplicitNullChecks(
static cl::opt<bool> DisableMergeICmps("disable-mergeicmps",
cl::desc("Disable MergeICmps Pass"),
cl::init(false), cl::Hidden);
-static cl::opt<bool> PrintLSR("print-lsr-output", cl::Hidden,
- cl::desc("Print LLVM IR produced by the loop-reduce pass"));
static cl::opt<bool>
PrintISelInput("print-isel-input", cl::Hidden,
cl::desc("Print LLVM IR input to isel pass"));
@@ -503,7 +501,6 @@ CGPassBuilderOption llvm::getCGPassBuilderOption() {
SET_BOOLEAN_OPTION(DisableCGP)
SET_BOOLEAN_OPTION(DisablePartialLibcallInlining)
SET_BOOLEAN_OPTION(DisableSelectOptimize)
- SET_BOOLEAN_OPTION(PrintLSR)
SET_BOOLEAN_OPTION(PrintISelInput)
SET_BOOLEAN_OPTION(DebugifyAndStripAll)
SET_BOOLEAN_OPTION(DebugifyCheckAndStripAll)
@@ -836,9 +833,6 @@ void TargetPassConfig::addIRPasses() {
addPass(createLoopStrengthReducePass());
if (EnableLoopTermFold)
addPass(createLoopTermFoldPass());
- if (PrintLSR)
- addPass(createPrintFunctionPass(dbgs(),
- "\n\n*** Code after LSR ***\n"));
}
// The MergeICmpsPass tries to create memcmp calls by grouping sequences of