diff options
author | jofrn <jofernau@amd.com> | 2025-01-31 13:12:56 -0500 |
---|---|---|
committer | jofernau_amdeng <joe.fernau@amd.com> | 2025-05-27 13:34:31 -0400 |
commit | 7ba3e69a759f59bf746cb14640ea8ea426fa09fd (patch) | |
tree | 61708f8abb299cfc69a0477d225431e586acf0fa | |
parent | f91634795e4dbfd6e081f0b096b872411051ff0f (diff) | |
download | llvm-users/jofrn/spr/main/b83937a8.zip llvm-users/jofrn/spr/main/b83937a8.tar.gz llvm-users/jofrn/spr/main/b83937a8.tar.bz2 |
[SelectionDAG][X86] Remove unused elements from atomic vector.users/jofrn/spr/main/b83937a8
After splitting, all elements are created. The two components must
be found by looking at the upper and lower half of the value.
This change extends EltsFromConsecutiveLoads
to understand AtomicSDNode so that unused elements can be removed.
commit-id:b83937a8
-rw-r--r-- | llvm/include/llvm/CodeGen/SelectionDAG.h | 2 | ||||
-rw-r--r-- | llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp | 2 | ||||
-rw-r--r-- | llvm/lib/Target/X86/X86ISelLowering.cpp | 61 |
3 files changed, 51 insertions, 14 deletions
diff --git a/llvm/include/llvm/CodeGen/SelectionDAG.h b/llvm/include/llvm/CodeGen/SelectionDAG.h index 87b6914..40550d9 100644 --- a/llvm/include/llvm/CodeGen/SelectionDAG.h +++ b/llvm/include/llvm/CodeGen/SelectionDAG.h @@ -1873,7 +1873,7 @@ public: /// chain to the token factor. This ensures that the new memory node will have /// the same relative memory dependency position as the old load. Returns the /// new merged load chain. - SDValue makeEquivalentMemoryOrdering(LoadSDNode *OldLoad, SDValue NewMemOp); + SDValue makeEquivalentMemoryOrdering(MemSDNode *OldLoad, SDValue NewMemOp); /// Topological-sort the AllNodes list and a /// assign a unique node id for each node in the DAG based on their diff --git a/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp b/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp index e6a7d09..1c1445f 100644 --- a/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp @@ -12236,7 +12236,7 @@ SDValue SelectionDAG::makeEquivalentMemoryOrdering(SDValue OldChain, return TokenFactor; } -SDValue SelectionDAG::makeEquivalentMemoryOrdering(LoadSDNode *OldLoad, +SDValue SelectionDAG::makeEquivalentMemoryOrdering(MemSDNode *OldLoad, SDValue NewMemOp) { assert(isa<MemSDNode>(NewMemOp.getNode()) && "Expected a memop node"); SDValue OldChain = SDValue(OldLoad, 1); diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp index 1fc50a3..5ce8d83 100644 --- a/llvm/lib/Target/X86/X86ISelLowering.cpp +++ b/llvm/lib/Target/X86/X86ISelLowering.cpp @@ -7193,15 +7193,19 @@ static SDValue LowerAsSplatVectorLoad(SDValue SrcOp, MVT VT, const SDLoc &dl, } // Recurse to find a LoadSDNode source and the accumulated ByteOffest. -static bool findEltLoadSrc(SDValue Elt, LoadSDNode *&Ld, int64_t &ByteOffset) { - if (ISD::isNON_EXTLoad(Elt.getNode())) { - auto *BaseLd = cast<LoadSDNode>(Elt); - if (!BaseLd->isSimple()) - return false; +static bool findEltLoadSrc(SDValue Elt, MemSDNode *&Ld, int64_t &ByteOffset) { + if (auto *BaseLd = dyn_cast<AtomicSDNode>(Elt)) { Ld = BaseLd; ByteOffset = 0; return true; - } + } else if (auto *BaseLd = dyn_cast<LoadSDNode>(Elt)) + if (ISD::isNON_EXTLoad(Elt.getNode())) { + if (!BaseLd->isSimple()) + return false; + Ld = BaseLd; + ByteOffset = 0; + return true; + } switch (Elt.getOpcode()) { case ISD::BITCAST: @@ -7254,7 +7258,7 @@ static SDValue EltsFromConsecutiveLoads(EVT VT, ArrayRef<SDValue> Elts, APInt ZeroMask = APInt::getZero(NumElems); APInt UndefMask = APInt::getZero(NumElems); - SmallVector<LoadSDNode*, 8> Loads(NumElems, nullptr); + SmallVector<MemSDNode *, 8> Loads(NumElems, nullptr); SmallVector<int64_t, 8> ByteOffsets(NumElems, 0); // For each element in the initializer, see if we've found a load, zero or an @@ -7304,7 +7308,7 @@ static SDValue EltsFromConsecutiveLoads(EVT VT, ArrayRef<SDValue> Elts, EVT EltBaseVT = EltBase.getValueType(); assert(EltBaseVT.getSizeInBits() == EltBaseVT.getStoreSizeInBits() && "Register/Memory size mismatch"); - LoadSDNode *LDBase = Loads[FirstLoadedElt]; + MemSDNode *LDBase = Loads[FirstLoadedElt]; assert(LDBase && "Did not find base load for merging consecutive loads"); unsigned BaseSizeInBits = EltBaseVT.getStoreSizeInBits(); unsigned BaseSizeInBytes = BaseSizeInBits / 8; @@ -7318,15 +7322,18 @@ static SDValue EltsFromConsecutiveLoads(EVT VT, ArrayRef<SDValue> Elts, // Check to see if the element's load is consecutive to the base load // or offset from a previous (already checked) load. - auto CheckConsecutiveLoad = [&](LoadSDNode *Base, int EltIdx) { - LoadSDNode *Ld = Loads[EltIdx]; + auto CheckConsecutiveLoad = [&](MemSDNode *Base, int EltIdx) { + MemSDNode *Ld = Loads[EltIdx]; int64_t ByteOffset = ByteOffsets[EltIdx]; if (ByteOffset && (ByteOffset % BaseSizeInBytes) == 0) { int64_t BaseIdx = EltIdx - (ByteOffset / BaseSizeInBytes); return (0 <= BaseIdx && BaseIdx < (int)NumElems && LoadMask[BaseIdx] && Loads[BaseIdx] == Ld && ByteOffsets[BaseIdx] == 0); } - return DAG.areNonVolatileConsecutiveLoads(Ld, Base, BaseSizeInBytes, + auto *L = dyn_cast<LoadSDNode>(Ld); + auto *B = dyn_cast<LoadSDNode>(Base); + return L && B && + DAG.areNonVolatileConsecutiveLoads(L, B, BaseSizeInBytes, EltIdx - FirstLoadedElt); }; @@ -7347,7 +7354,7 @@ static SDValue EltsFromConsecutiveLoads(EVT VT, ArrayRef<SDValue> Elts, } } - auto CreateLoad = [&DAG, &DL, &Loads](EVT VT, LoadSDNode *LDBase) { + auto CreateLoad = [&DAG, &DL, &Loads](EVT VT, MemSDNode *LDBase) { auto MMOFlags = LDBase->getMemOperand()->getFlags(); assert(LDBase->isSimple() && "Cannot merge volatile or atomic loads."); @@ -60539,6 +60546,35 @@ static SDValue combineINTRINSIC_VOID(SDNode *N, SelectionDAG &DAG, return SDValue(); } +static SDValue combineVZEXT_LOAD(SDNode *N, SelectionDAG &DAG, + TargetLowering::DAGCombinerInfo &DCI) { + // Find the TokenFactor to locate the associated AtomicLoad. + SDNode *ALD = nullptr; + for (auto &TF : N->uses()) + if (TF.getUser()->getOpcode() == ISD::TokenFactor) { + SDValue L = TF.getUser()->getOperand(0); + SDValue R = TF.getUser()->getOperand(1); + if (L.getNode() == N) + ALD = R.getNode(); + else if (R.getNode() == N) + ALD = L.getNode(); + } + + if (!ALD) + return SDValue(); + if (!isa<AtomicSDNode>(ALD)) + return SDValue(); + + // Replace the VZEXT_LOAD with the AtomicLoad. + SDLoc dl(N); + SDValue SV = + DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, + N->getValueType(0).changeTypeToInteger(), SDValue(ALD, 0)); + SDValue BC = DAG.getNode(ISD::BITCAST, dl, N->getValueType(0), SV); + BC = DCI.CombineTo(N, BC, SDValue(ALD, 1)); + return BC; +} + SDValue X86TargetLowering::PerformDAGCombine(SDNode *N, DAGCombinerInfo &DCI) const { SelectionDAG &DAG = DCI.DAG; @@ -60735,6 +60771,7 @@ SDValue X86TargetLowering::PerformDAGCombine(SDNode *N, case ISD::INTRINSIC_VOID: return combineINTRINSIC_VOID(N, DAG, DCI); case ISD::FP_TO_SINT_SAT: case ISD::FP_TO_UINT_SAT: return combineFP_TO_xINT_SAT(N, DAG, Subtarget); + case X86ISD::VZEXT_LOAD: return combineVZEXT_LOAD(N, DAG, DCI); // clang-format on } |