aboutsummaryrefslogtreecommitdiff
path: root/llvm/lib
diff options
context:
space:
mode:
Diffstat (limited to 'llvm/lib')
-rw-r--r--llvm/lib/Target/AArch64/AArch64ISelLowering.cpp41
-rw-r--r--llvm/lib/Target/RISCV/RISCVInsertVSETVLI.cpp7
-rw-r--r--llvm/lib/Target/X86/X86ISelLowering.cpp6
-rw-r--r--llvm/lib/Transforms/Vectorize/VPlan.h5
4 files changed, 51 insertions, 8 deletions
diff --git a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
index d08f9b9..40e6400 100644
--- a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
+++ b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
@@ -50,6 +50,7 @@
#include "llvm/CodeGen/MachineInstrBuilder.h"
#include "llvm/CodeGen/MachineMemOperand.h"
#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/CodeGen/SDPatternMatch.h"
#include "llvm/CodeGen/SelectionDAG.h"
#include "llvm/CodeGen/SelectionDAGNodes.h"
#include "llvm/CodeGen/TargetCallingConv.h"
@@ -104,7 +105,6 @@
#include <vector>
using namespace llvm;
-using namespace llvm::PatternMatch;
#define DEBUG_TYPE "aarch64-lower"
@@ -1174,6 +1174,7 @@ AArch64TargetLowering::AArch64TargetLowering(const TargetMachine &TM,
setTargetDAGCombine(ISD::SHL);
setTargetDAGCombine(ISD::VECTOR_DEINTERLEAVE);
+ setTargetDAGCombine(ISD::CTPOP);
// In case of strict alignment, avoid an excessive number of byte wide stores.
MaxStoresPerMemsetOptSize = 8;
@@ -11330,9 +11331,10 @@ SDValue AArch64TargetLowering::LowerMinMax(SDValue Op,
break;
}
+ // Note: This lowering only overrides NEON for v1i64 and v2i64, where we
+ // prefer using SVE if available.
if (VT.isScalableVector() ||
- useSVEForFixedLengthVectorVT(
- VT, /*OverrideNEON=*/Subtarget->useSVEForFixedLengthVectors())) {
+ useSVEForFixedLengthVectorVT(VT, /*OverrideNEON=*/true)) {
switch (Opcode) {
default:
llvm_unreachable("Wrong instruction");
@@ -17554,6 +17556,7 @@ bool AArch64TargetLowering::optimizeExtendOrTruncateConversion(
// udot instruction.
if (SrcWidth * 4 <= DstWidth) {
if (all_of(I->users(), [&](auto *U) {
+ using namespace llvm::PatternMatch;
auto *SingleUser = cast<Instruction>(&*U);
if (match(SingleUser, m_c_Mul(m_Specific(I), m_SExt(m_Value()))))
return true;
@@ -17825,6 +17828,7 @@ bool AArch64TargetLowering::lowerInterleavedLoad(
// into shift / and masks. For the moment we do this just for uitofp (not
// zext) to avoid issues with widening instructions.
if (Shuffles.size() == 4 && all_of(Shuffles, [](ShuffleVectorInst *SI) {
+ using namespace llvm::PatternMatch;
return SI->hasOneUse() && match(SI->user_back(), m_UIToFP(m_Value())) &&
SI->getType()->getScalarSizeInBits() * 4 ==
SI->user_back()->getType()->getScalarSizeInBits();
@@ -27841,6 +27845,35 @@ static SDValue performRNDRCombine(SDNode *N, SelectionDAG &DAG) {
{A, DAG.getZExtOrTrunc(B, DL, MVT::i1), A.getValue(2)}, DL);
}
+static SDValue performCTPOPCombine(SDNode *N,
+ TargetLowering::DAGCombinerInfo &DCI,
+ SelectionDAG &DAG) {
+ using namespace llvm::SDPatternMatch;
+ if (!DCI.isBeforeLegalize())
+ return SDValue();
+
+ // ctpop(zext(bitcast(vector_mask))) -> neg(signed_reduce_add(vector_mask))
+ SDValue Mask;
+ if (!sd_match(N->getOperand(0), m_ZExt(m_BitCast(m_Value(Mask)))))
+ return SDValue();
+
+ EVT VT = N->getValueType(0);
+ EVT MaskVT = Mask.getValueType();
+
+ if (VT.isVector() || !MaskVT.isFixedLengthVector() ||
+ MaskVT.getVectorElementType() != MVT::i1)
+ return SDValue();
+
+ EVT ReduceInVT =
+ EVT::getVectorVT(*DAG.getContext(), VT, MaskVT.getVectorElementCount());
+
+ SDLoc DL(N);
+ // Sign extend to best fit ZeroOrNegativeOneBooleanContent.
+ SDValue ExtMask = DAG.getNode(ISD::SIGN_EXTEND, DL, ReduceInVT, Mask);
+ SDValue NegPopCount = DAG.getNode(ISD::VECREDUCE_ADD, DL, VT, ExtMask);
+ return DAG.getNegative(NegPopCount, DL, VT);
+}
+
SDValue AArch64TargetLowering::PerformDAGCombine(SDNode *N,
DAGCombinerInfo &DCI) const {
SelectionDAG &DAG = DCI.DAG;
@@ -28186,6 +28219,8 @@ SDValue AArch64TargetLowering::PerformDAGCombine(SDNode *N,
return performScalarToVectorCombine(N, DCI, DAG);
case ISD::SHL:
return performSHLCombine(N, DCI, DAG);
+ case ISD::CTPOP:
+ return performCTPOPCombine(N, DCI, DAG);
}
return SDValue();
}
diff --git a/llvm/lib/Target/RISCV/RISCVInsertVSETVLI.cpp b/llvm/lib/Target/RISCV/RISCVInsertVSETVLI.cpp
index 636e31c..bf9de0a 100644
--- a/llvm/lib/Target/RISCV/RISCVInsertVSETVLI.cpp
+++ b/llvm/lib/Target/RISCV/RISCVInsertVSETVLI.cpp
@@ -1583,7 +1583,10 @@ void RISCVInsertVSETVLI::emitVSETVLIs(MachineBasicBlock &MBB) {
if (!TII->isAddImmediate(*DeadMI, Reg))
continue;
LIS->RemoveMachineInstrFromMaps(*DeadMI);
+ Register AddReg = DeadMI->getOperand(1).getReg();
DeadMI->eraseFromParent();
+ if (AddReg.isVirtual())
+ LIS->shrinkToUses(&LIS->getInterval(AddReg));
}
}
}
@@ -1869,11 +1872,15 @@ void RISCVInsertVSETVLI::coalesceVSETVLIs(MachineBasicBlock &MBB) const {
// Loop over the dead AVL values, and delete them now. This has
// to be outside the above loop to avoid invalidating iterators.
for (auto *MI : ToDelete) {
+ assert(MI->getOpcode() == RISCV::ADDI);
+ Register AddReg = MI->getOperand(1).getReg();
if (LIS) {
LIS->removeInterval(MI->getOperand(0).getReg());
LIS->RemoveMachineInstrFromMaps(*MI);
}
MI->eraseFromParent();
+ if (LIS && AddReg.isVirtual())
+ LIS->shrinkToUses(&LIS->getInterval(AddReg));
}
}
diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp
index 4d44227b3..168e041 100644
--- a/llvm/lib/Target/X86/X86ISelLowering.cpp
+++ b/llvm/lib/Target/X86/X86ISelLowering.cpp
@@ -53442,7 +53442,8 @@ static SDValue narrowBitOpRMW(StoreSDNode *St, const SDLoc &DL,
}
SDValue NewStore =
- DAG.getStore(St->getChain(), DL, Res, NewPtr, St->getPointerInfo(),
+ DAG.getStore(St->getChain(), DL, Res, NewPtr,
+ MachinePointerInfo(St->getPointerInfo().getAddrSpace()),
Align(), St->getMemOperand()->getFlags());
// If there are other uses of StoredVal, replace with a new load of the
@@ -54639,7 +54640,8 @@ static SDValue combineTruncate(SDNode *N, SelectionDAG &DAG,
SDValue NewPtr = DAG.getMemBasePlusOffset(
Ld->getBasePtr(), PtrByteOfs, DL, SDNodeFlags::NoUnsignedWrap);
SDValue NewLoad =
- DAG.getLoad(VT, DL, Ld->getChain(), NewPtr, Ld->getPointerInfo(),
+ DAG.getLoad(VT, DL, Ld->getChain(), NewPtr,
+ MachinePointerInfo(Ld->getPointerInfo().getAddrSpace()),
Align(), Ld->getMemOperand()->getFlags());
DAG.makeEquivalentMemoryOrdering(Ld, NewLoad);
return NewLoad;
diff --git a/llvm/lib/Transforms/Vectorize/VPlan.h b/llvm/lib/Transforms/Vectorize/VPlan.h
index bbb03fb..3062e1c 100644
--- a/llvm/lib/Transforms/Vectorize/VPlan.h
+++ b/llvm/lib/Transforms/Vectorize/VPlan.h
@@ -3212,9 +3212,8 @@ protected:
Alignment(getLoadStoreAlignment(&I)), Consecutive(Consecutive),
Reverse(Reverse) {
assert((Consecutive || !Reverse) && "Reverse implies consecutive");
- assert(isa<VPVectorEndPointerRecipe>(getAddr()) ||
- !Reverse &&
- "Reversed acccess without VPVectorEndPointerRecipe address?");
+ assert((isa<VPVectorEndPointerRecipe>(getAddr()) || !Reverse) &&
+ "Reversed acccess without VPVectorEndPointerRecipe address?");
}
public: