aboutsummaryrefslogtreecommitdiff
path: root/llvm/lib/CodeGen/ComplexDeinterleavingPass.cpp
diff options
context:
space:
mode:
authorNAKAMURA Takumi <geek4civic@gmail.com>2025-01-09 18:15:55 +0900
committerNAKAMURA Takumi <geek4civic@gmail.com>2025-01-09 18:15:55 +0900
commitbdcf47e4bcb92889665825654bb80a8bbe30379e (patch)
tree4de1d6b4ddc69f4f32daabb11ad5c71ab0cf895e /llvm/lib/CodeGen/ComplexDeinterleavingPass.cpp
parente7fd5cd25334048980ea207a9eff72698724721a (diff)
parentfea7da1b00cc97d742faede2df96c7d327950f49 (diff)
downloadllvm-users/chapuni/cov/single/base.zip
llvm-users/chapuni/cov/single/base.tar.gz
llvm-users/chapuni/cov/single/base.tar.bz2
Merge branch 'users/chapuni/cov/single/nextcount' into users/chapuni/cov/single/baseusers/chapuni/cov/single/base
Diffstat (limited to 'llvm/lib/CodeGen/ComplexDeinterleavingPass.cpp')
-rw-r--r--llvm/lib/CodeGen/ComplexDeinterleavingPass.cpp290
1 files changed, 276 insertions, 14 deletions
diff --git a/llvm/lib/CodeGen/ComplexDeinterleavingPass.cpp b/llvm/lib/CodeGen/ComplexDeinterleavingPass.cpp
index f3f7ea9..aec8df9 100644
--- a/llvm/lib/CodeGen/ComplexDeinterleavingPass.cpp
+++ b/llvm/lib/CodeGen/ComplexDeinterleavingPass.cpp
@@ -108,6 +108,13 @@ static bool isNeg(Value *V);
static Value *getNegOperand(Value *V);
namespace {
+template <typename T, typename IterT>
+std::optional<T> findCommonBetweenCollections(IterT A, IterT B) {
+ auto Common = llvm::find_if(A, [B](T I) { return llvm::is_contained(B, I); });
+ if (Common != A.end())
+ return std::make_optional(*Common);
+ return std::nullopt;
+}
class ComplexDeinterleavingLegacyPass : public FunctionPass {
public:
@@ -144,6 +151,7 @@ private:
friend class ComplexDeinterleavingGraph;
using NodePtr = std::shared_ptr<ComplexDeinterleavingCompositeNode>;
using RawNodePtr = ComplexDeinterleavingCompositeNode *;
+ bool OperandsValid = true;
public:
ComplexDeinterleavingOperation Operation;
@@ -160,7 +168,11 @@ public:
SmallVector<RawNodePtr> Operands;
Value *ReplacementNode = nullptr;
- void addOperand(NodePtr Node) { Operands.push_back(Node.get()); }
+ void addOperand(NodePtr Node) {
+ if (!Node || !Node.get())
+ OperandsValid = false;
+ Operands.push_back(Node.get());
+ }
void dump() { dump(dbgs()); }
void dump(raw_ostream &OS) {
@@ -194,6 +206,8 @@ public:
PrintNodeRef(Op);
}
}
+
+ bool areOperandsValid() { return OperandsValid; }
};
class ComplexDeinterleavingGraph {
@@ -293,7 +307,7 @@ private:
NodePtr submitCompositeNode(NodePtr Node) {
CompositeNodes.push_back(Node);
- if (Node->Real && Node->Imag)
+ if (Node->Real)
CachedResult[{Node->Real, Node->Imag}] = Node;
return Node;
}
@@ -327,6 +341,8 @@ private:
/// i: ai - br
NodePtr identifyAdd(Instruction *Real, Instruction *Imag);
NodePtr identifySymmetricOperation(Instruction *Real, Instruction *Imag);
+ NodePtr identifyPartialReduction(Value *R, Value *I);
+ NodePtr identifyDotProduct(Value *Inst);
NodePtr identifyNode(Value *R, Value *I);
@@ -396,6 +412,7 @@ private:
/// * Deinterleave the final value outside of the loop and repurpose original
/// reduction users
void processReductionOperation(Value *OperationReplacement, RawNodePtr Node);
+ void processReductionSingle(Value *OperationReplacement, RawNodePtr Node);
public:
void dump() { dump(dbgs()); }
@@ -891,17 +908,163 @@ ComplexDeinterleavingGraph::identifySymmetricOperation(Instruction *Real,
}
ComplexDeinterleavingGraph::NodePtr
-ComplexDeinterleavingGraph::identifyNode(Value *R, Value *I) {
- LLVM_DEBUG(dbgs() << "identifyNode on " << *R << " / " << *I << "\n");
- assert(R->getType() == I->getType() &&
- "Real and imaginary parts should not have different types");
+ComplexDeinterleavingGraph::identifyDotProduct(Value *V) {
+
+ if (!TL->isComplexDeinterleavingOperationSupported(
+ ComplexDeinterleavingOperation::CDot, V->getType())) {
+ LLVM_DEBUG(dbgs() << "Target doesn't support complex deinterleaving "
+ "operation CDot with the type "
+ << *V->getType() << "\n");
+ return nullptr;
+ }
+
+ auto *Inst = cast<Instruction>(V);
+ auto *RealUser = cast<Instruction>(*Inst->user_begin());
+
+ NodePtr CN =
+ prepareCompositeNode(ComplexDeinterleavingOperation::CDot, Inst, nullptr);
+
+ NodePtr ANode;
+
+ const Intrinsic::ID PartialReduceInt =
+ Intrinsic::experimental_vector_partial_reduce_add;
+
+ Value *AReal = nullptr;
+ Value *AImag = nullptr;
+ Value *BReal = nullptr;
+ Value *BImag = nullptr;
+ Value *Phi = nullptr;
+
+ auto UnwrapCast = [](Value *V) -> Value * {
+ if (auto *CI = dyn_cast<CastInst>(V))
+ return CI->getOperand(0);
+ return V;
+ };
+
+ auto PatternRot0 = m_Intrinsic<PartialReduceInt>(
+ m_Intrinsic<PartialReduceInt>(m_Value(Phi),
+ m_Mul(m_Value(BReal), m_Value(AReal))),
+ m_Neg(m_Mul(m_Value(BImag), m_Value(AImag))));
+
+ auto PatternRot270 = m_Intrinsic<PartialReduceInt>(
+ m_Intrinsic<PartialReduceInt>(
+ m_Value(Phi), m_Neg(m_Mul(m_Value(BReal), m_Value(AImag)))),
+ m_Mul(m_Value(BImag), m_Value(AReal)));
+
+ if (match(Inst, PatternRot0)) {
+ CN->Rotation = ComplexDeinterleavingRotation::Rotation_0;
+ } else if (match(Inst, PatternRot270)) {
+ CN->Rotation = ComplexDeinterleavingRotation::Rotation_270;
+ } else {
+ Value *A0, *A1;
+ // The rotations 90 and 180 share the same operation pattern, so inspect the
+ // order of the operands, identifying where the real and imaginary
+ // components of A go, to discern between the aforementioned rotations.
+ auto PatternRot90Rot180 = m_Intrinsic<PartialReduceInt>(
+ m_Intrinsic<PartialReduceInt>(m_Value(Phi),
+ m_Mul(m_Value(BReal), m_Value(A0))),
+ m_Mul(m_Value(BImag), m_Value(A1)));
+
+ if (!match(Inst, PatternRot90Rot180))
+ return nullptr;
+
+ A0 = UnwrapCast(A0);
+ A1 = UnwrapCast(A1);
+
+ // Test if A0 is real/A1 is imag
+ ANode = identifyNode(A0, A1);
+ if (!ANode) {
+ // Test if A0 is imag/A1 is real
+ ANode = identifyNode(A1, A0);
+ // Unable to identify operand components, thus unable to identify rotation
+ if (!ANode)
+ return nullptr;
+ CN->Rotation = ComplexDeinterleavingRotation::Rotation_90;
+ AReal = A1;
+ AImag = A0;
+ } else {
+ AReal = A0;
+ AImag = A1;
+ CN->Rotation = ComplexDeinterleavingRotation::Rotation_180;
+ }
+ }
+
+ AReal = UnwrapCast(AReal);
+ AImag = UnwrapCast(AImag);
+ BReal = UnwrapCast(BReal);
+ BImag = UnwrapCast(BImag);
+
+ VectorType *VTy = cast<VectorType>(V->getType());
+ Type *ExpectedOperandTy = VectorType::getSubdividedVectorType(VTy, 2);
+ if (AReal->getType() != ExpectedOperandTy)
+ return nullptr;
+ if (AImag->getType() != ExpectedOperandTy)
+ return nullptr;
+ if (BReal->getType() != ExpectedOperandTy)
+ return nullptr;
+ if (BImag->getType() != ExpectedOperandTy)
+ return nullptr;
+
+ if (Phi->getType() != VTy && RealUser->getType() != VTy)
+ return nullptr;
+
+ NodePtr Node = identifyNode(AReal, AImag);
+
+ // In the case that a node was identified to figure out the rotation, ensure
+ // that trying to identify a node with AReal and AImag post-unwrap results in
+ // the same node
+ if (ANode && Node != ANode) {
+ LLVM_DEBUG(
+ dbgs()
+ << "Identified node is different from previously identified node. "
+ "Unable to confidently generate a complex operation node\n");
+ return nullptr;
+ }
+
+ CN->addOperand(Node);
+ CN->addOperand(identifyNode(BReal, BImag));
+ CN->addOperand(identifyNode(Phi, RealUser));
+
+ return submitCompositeNode(CN);
+}
+
+ComplexDeinterleavingGraph::NodePtr
+ComplexDeinterleavingGraph::identifyPartialReduction(Value *R, Value *I) {
+ // Partial reductions don't support non-vector types, so check these first
+ if (!isa<VectorType>(R->getType()) || !isa<VectorType>(I->getType()))
+ return nullptr;
+
+ auto CommonUser =
+ findCommonBetweenCollections<Value *>(R->users(), I->users());
+ if (!CommonUser)
+ return nullptr;
+
+ auto *IInst = dyn_cast<IntrinsicInst>(*CommonUser);
+ if (!IInst || IInst->getIntrinsicID() !=
+ Intrinsic::experimental_vector_partial_reduce_add)
+ return nullptr;
+
+ if (NodePtr CN = identifyDotProduct(IInst))
+ return CN;
+
+ return nullptr;
+}
+ComplexDeinterleavingGraph::NodePtr
+ComplexDeinterleavingGraph::identifyNode(Value *R, Value *I) {
auto It = CachedResult.find({R, I});
if (It != CachedResult.end()) {
LLVM_DEBUG(dbgs() << " - Folding to existing node\n");
return It->second;
}
+ if (NodePtr CN = identifyPartialReduction(R, I))
+ return CN;
+
+ bool IsReduction = RealPHI == R && (!ImagPHI || ImagPHI == I);
+ if (!IsReduction && R->getType() != I->getType())
+ return nullptr;
+
if (NodePtr CN = identifySplat(R, I))
return CN;
@@ -1427,12 +1590,20 @@ bool ComplexDeinterleavingGraph::identifyNodes(Instruction *RootI) {
if (It != RootToNode.end()) {
auto RootNode = It->second;
assert(RootNode->Operation ==
- ComplexDeinterleavingOperation::ReductionOperation);
+ ComplexDeinterleavingOperation::ReductionOperation ||
+ RootNode->Operation ==
+ ComplexDeinterleavingOperation::ReductionSingle);
// Find out which part, Real or Imag, comes later, and only if we come to
// the latest part, add it to OrderedRoots.
auto *R = cast<Instruction>(RootNode->Real);
- auto *I = cast<Instruction>(RootNode->Imag);
- auto *ReplacementAnchor = R->comesBefore(I) ? I : R;
+ auto *I = RootNode->Imag ? cast<Instruction>(RootNode->Imag) : nullptr;
+
+ Instruction *ReplacementAnchor;
+ if (I)
+ ReplacementAnchor = R->comesBefore(I) ? I : R;
+ else
+ ReplacementAnchor = R;
+
if (ReplacementAnchor != RootI)
return false;
OrderedRoots.push_back(RootI);
@@ -1523,7 +1694,6 @@ void ComplexDeinterleavingGraph::identifyReductionNodes() {
for (size_t j = i + 1; j < OperationInstruction.size(); ++j) {
if (Processed[j])
continue;
-
auto *Real = OperationInstruction[i];
auto *Imag = OperationInstruction[j];
if (Real->getType() != Imag->getType())
@@ -1556,6 +1726,28 @@ void ComplexDeinterleavingGraph::identifyReductionNodes() {
break;
}
}
+
+ auto *Real = OperationInstruction[i];
+ // We want to check that we have 2 operands, but the function attributes
+ // being counted as operands bloats this value.
+ if (Real->getNumOperands() < 2)
+ continue;
+
+ RealPHI = ReductionInfo[Real].first;
+ ImagPHI = nullptr;
+ PHIsFound = false;
+ auto Node = identifyNode(Real->getOperand(0), Real->getOperand(1));
+ if (Node && PHIsFound) {
+ LLVM_DEBUG(
+ dbgs() << "Identified single reduction starting from instruction: "
+ << *Real << "/" << *ReductionInfo[Real].second << "\n");
+ Processed[i] = true;
+ auto RootNode = prepareCompositeNode(
+ ComplexDeinterleavingOperation::ReductionSingle, Real, nullptr);
+ RootNode->addOperand(Node);
+ RootToNode[Real] = RootNode;
+ submitCompositeNode(RootNode);
+ }
}
RealPHI = nullptr;
@@ -1563,6 +1755,24 @@ void ComplexDeinterleavingGraph::identifyReductionNodes() {
}
bool ComplexDeinterleavingGraph::checkNodes() {
+
+ bool FoundDeinterleaveNode = false;
+ for (NodePtr N : CompositeNodes) {
+ if (!N->areOperandsValid())
+ return false;
+ if (N->Operation == ComplexDeinterleavingOperation::Deinterleave)
+ FoundDeinterleaveNode = true;
+ }
+
+ // We need a deinterleave node in order to guarantee that we're working with
+ // complex numbers.
+ if (!FoundDeinterleaveNode) {
+ LLVM_DEBUG(
+ dbgs() << "Couldn't find a deinterleave node within the graph, cannot "
+ "guarantee safety during graph transformation.\n");
+ return false;
+ }
+
// Collect all instructions from roots to leaves
SmallPtrSet<Instruction *, 16> AllInstructions;
SmallVector<Instruction *, 8> Worklist;
@@ -1831,7 +2041,7 @@ ComplexDeinterleavingGraph::identifySplat(Value *R, Value *I) {
ComplexDeinterleavingGraph::NodePtr
ComplexDeinterleavingGraph::identifyPHINode(Instruction *Real,
Instruction *Imag) {
- if (Real != RealPHI || Imag != ImagPHI)
+ if (Real != RealPHI || (ImagPHI && Imag != ImagPHI))
return nullptr;
PHIsFound = true;
@@ -1926,6 +2136,16 @@ Value *ComplexDeinterleavingGraph::replaceNode(IRBuilderBase &Builder,
Value *ReplacementNode;
switch (Node->Operation) {
+ case ComplexDeinterleavingOperation::CDot: {
+ Value *Input0 = ReplaceOperandIfExist(Node, 0);
+ Value *Input1 = ReplaceOperandIfExist(Node, 1);
+ Value *Accumulator = ReplaceOperandIfExist(Node, 2);
+ assert(!Input1 || (Input0->getType() == Input1->getType() &&
+ "Node inputs need to be of the same type"));
+ ReplacementNode = TL->createComplexDeinterleavingIR(
+ Builder, Node->Operation, Node->Rotation, Input0, Input1, Accumulator);
+ break;
+ }
case ComplexDeinterleavingOperation::CAdd:
case ComplexDeinterleavingOperation::CMulPartial:
case ComplexDeinterleavingOperation::Symmetric: {
@@ -1969,13 +2189,18 @@ Value *ComplexDeinterleavingGraph::replaceNode(IRBuilderBase &Builder,
case ComplexDeinterleavingOperation::ReductionPHI: {
// If Operation is ReductionPHI, a new empty PHINode is created.
// It is filled later when the ReductionOperation is processed.
+ auto *OldPHI = cast<PHINode>(Node->Real);
auto *VTy = cast<VectorType>(Node->Real->getType());
auto *NewVTy = VectorType::getDoubleElementsVectorType(VTy);
auto *NewPHI = PHINode::Create(NewVTy, 0, "", BackEdge->getFirstNonPHIIt());
- OldToNewPHI[dyn_cast<PHINode>(Node->Real)] = NewPHI;
+ OldToNewPHI[OldPHI] = NewPHI;
ReplacementNode = NewPHI;
break;
}
+ case ComplexDeinterleavingOperation::ReductionSingle:
+ ReplacementNode = replaceNode(Builder, Node->Operands[0]);
+ processReductionSingle(ReplacementNode, Node);
+ break;
case ComplexDeinterleavingOperation::ReductionOperation:
ReplacementNode = replaceNode(Builder, Node->Operands[0]);
processReductionOperation(ReplacementNode, Node);
@@ -2000,6 +2225,38 @@ Value *ComplexDeinterleavingGraph::replaceNode(IRBuilderBase &Builder,
return ReplacementNode;
}
+void ComplexDeinterleavingGraph::processReductionSingle(
+ Value *OperationReplacement, RawNodePtr Node) {
+ auto *Real = cast<Instruction>(Node->Real);
+ auto *OldPHI = ReductionInfo[Real].first;
+ auto *NewPHI = OldToNewPHI[OldPHI];
+ auto *VTy = cast<VectorType>(Real->getType());
+ auto *NewVTy = VectorType::getDoubleElementsVectorType(VTy);
+
+ Value *Init = OldPHI->getIncomingValueForBlock(Incoming);
+
+ IRBuilder<> Builder(Incoming->getTerminator());
+
+ Value *NewInit = nullptr;
+ if (auto *C = dyn_cast<Constant>(Init)) {
+ if (C->isZeroValue())
+ NewInit = Constant::getNullValue(NewVTy);
+ }
+
+ if (!NewInit)
+ NewInit = Builder.CreateIntrinsic(Intrinsic::vector_interleave2, NewVTy,
+ {Init, Constant::getNullValue(VTy)});
+
+ NewPHI->addIncoming(NewInit, Incoming);
+ NewPHI->addIncoming(OperationReplacement, BackEdge);
+
+ auto *FinalReduction = ReductionInfo[Real].second;
+ Builder.SetInsertPoint(&*FinalReduction->getParent()->getFirstInsertionPt());
+
+ auto *AddReduce = Builder.CreateAddReduce(OperationReplacement);
+ FinalReduction->replaceAllUsesWith(AddReduce);
+}
+
void ComplexDeinterleavingGraph::processReductionOperation(
Value *OperationReplacement, RawNodePtr Node) {
auto *Real = cast<Instruction>(Node->Real);
@@ -2059,8 +2316,13 @@ void ComplexDeinterleavingGraph::replaceNodes() {
auto *RootImag = cast<Instruction>(RootNode->Imag);
ReductionInfo[RootReal].first->removeIncomingValue(BackEdge);
ReductionInfo[RootImag].first->removeIncomingValue(BackEdge);
- DeadInstrRoots.push_back(cast<Instruction>(RootReal));
- DeadInstrRoots.push_back(cast<Instruction>(RootImag));
+ DeadInstrRoots.push_back(RootReal);
+ DeadInstrRoots.push_back(RootImag);
+ } else if (RootNode->Operation ==
+ ComplexDeinterleavingOperation::ReductionSingle) {
+ auto *RootInst = cast<Instruction>(RootNode->Real);
+ ReductionInfo[RootInst].first->removeIncomingValue(BackEdge);
+ DeadInstrRoots.push_back(ReductionInfo[RootInst].second);
} else {
assert(R && "Unable to find replacement for RootInstruction");
DeadInstrRoots.push_back(RootInstruction);