aboutsummaryrefslogtreecommitdiff
path: root/llvm/lib/CodeGen
diff options
context:
space:
mode:
Diffstat (limited to 'llvm/lib/CodeGen')
-rw-r--r--llvm/lib/CodeGen/AsmPrinter/AsmPrinter.cpp2
-rw-r--r--llvm/lib/CodeGen/ExpandLargeFpConvert.cpp49
-rw-r--r--llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp5
-rw-r--r--llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp9
-rw-r--r--llvm/lib/CodeGen/GlobalISel/MachineIRBuilder.cpp2
-rw-r--r--llvm/lib/CodeGen/MIRPrinter.cpp20
-rw-r--r--llvm/lib/CodeGen/MachinePipeliner.cpp2
-rw-r--r--llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp34
-rw-r--r--llvm/lib/CodeGen/SelectionDAG/LegalizeFloatTypes.cpp16
-rw-r--r--llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp29
-rw-r--r--llvm/lib/CodeGen/SelectionDAG/SelectionDAGDumper.cpp1
-rw-r--r--llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp48
-rw-r--r--llvm/lib/CodeGen/ShrinkWrap.cpp11
13 files changed, 143 insertions, 85 deletions
diff --git a/llvm/lib/CodeGen/AsmPrinter/AsmPrinter.cpp b/llvm/lib/CodeGen/AsmPrinter/AsmPrinter.cpp
index a155387..293bb5a 100644
--- a/llvm/lib/CodeGen/AsmPrinter/AsmPrinter.cpp
+++ b/llvm/lib/CodeGen/AsmPrinter/AsmPrinter.cpp
@@ -2927,7 +2927,7 @@ bool AsmPrinter::emitSpecialLLVMGlobal(const GlobalVariable *GV) {
return true;
}
- report_fatal_error("unknown special variable");
+ report_fatal_error("unknown special variable with appending linkage");
}
/// EmitLLVMUsedList - For targets that define a MAI::UsedDirective, mark each
diff --git a/llvm/lib/CodeGen/ExpandLargeFpConvert.cpp b/llvm/lib/CodeGen/ExpandLargeFpConvert.cpp
index 4ec966e..6213530 100644
--- a/llvm/lib/CodeGen/ExpandLargeFpConvert.cpp
+++ b/llvm/lib/CodeGen/ExpandLargeFpConvert.cpp
@@ -568,8 +568,29 @@ static void expandIToFP(Instruction *IToFP) {
IToFP->eraseFromParent();
}
+static void scalarize(Instruction *I, SmallVectorImpl<Instruction *> &Replace) {
+ VectorType *VTy = cast<FixedVectorType>(I->getType());
+
+ IRBuilder<> Builder(I);
+
+ unsigned NumElements = VTy->getElementCount().getFixedValue();
+ Value *Result = PoisonValue::get(VTy);
+ for (unsigned Idx = 0; Idx < NumElements; ++Idx) {
+ Value *Ext = Builder.CreateExtractElement(I->getOperand(0), Idx);
+ Value *Cast = Builder.CreateCast(cast<CastInst>(I)->getOpcode(), Ext,
+ I->getType()->getScalarType());
+ Result = Builder.CreateInsertElement(Result, Cast, Idx);
+ if (isa<Instruction>(Cast))
+ Replace.push_back(cast<Instruction>(Cast));
+ }
+ I->replaceAllUsesWith(Result);
+ I->dropAllReferences();
+ I->eraseFromParent();
+}
+
static bool runImpl(Function &F, const TargetLowering &TLI) {
SmallVector<Instruction *, 4> Replace;
+ SmallVector<Instruction *, 4> ReplaceVector;
bool Modified = false;
unsigned MaxLegalFpConvertBitWidth =
@@ -584,29 +605,36 @@ static bool runImpl(Function &F, const TargetLowering &TLI) {
switch (I.getOpcode()) {
case Instruction::FPToUI:
case Instruction::FPToSI: {
- // TODO: This pass doesn't handle vectors.
- if (I.getOperand(0)->getType()->isVectorTy())
+ // TODO: This pass doesn't handle scalable vectors.
+ if (I.getOperand(0)->getType()->isScalableTy())
continue;
- auto *IntTy = dyn_cast<IntegerType>(I.getType());
+ auto *IntTy = dyn_cast<IntegerType>(I.getType()->getScalarType());
if (IntTy->getIntegerBitWidth() <= MaxLegalFpConvertBitWidth)
continue;
- Replace.push_back(&I);
+ if (I.getOperand(0)->getType()->isVectorTy())
+ ReplaceVector.push_back(&I);
+ else
+ Replace.push_back(&I);
Modified = true;
break;
}
case Instruction::UIToFP:
case Instruction::SIToFP: {
- // TODO: This pass doesn't handle vectors.
- if (I.getOperand(0)->getType()->isVectorTy())
+ // TODO: This pass doesn't handle scalable vectors.
+ if (I.getOperand(0)->getType()->isScalableTy())
continue;
- auto *IntTy = dyn_cast<IntegerType>(I.getOperand(0)->getType());
+ auto *IntTy =
+ dyn_cast<IntegerType>(I.getOperand(0)->getType()->getScalarType());
if (IntTy->getIntegerBitWidth() <= MaxLegalFpConvertBitWidth)
continue;
- Replace.push_back(&I);
+ if (I.getOperand(0)->getType()->isVectorTy())
+ ReplaceVector.push_back(&I);
+ else
+ Replace.push_back(&I);
Modified = true;
break;
}
@@ -615,6 +643,11 @@ static bool runImpl(Function &F, const TargetLowering &TLI) {
}
}
+ while (!ReplaceVector.empty()) {
+ Instruction *I = ReplaceVector.pop_back_val();
+ scalarize(I, Replace);
+ }
+
if (Replace.empty())
return false;
diff --git a/llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp b/llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp
index 5cf7a33..e53e35d 100644
--- a/llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp
+++ b/llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp
@@ -5201,10 +5201,7 @@ MachineInstr *CombinerHelper::buildSDivUsingMul(MachineInstr &MI) {
// Calculate the multiplicative inverse modulo BW.
// 2^W requires W + 1 bits, so we have to extend and then truncate.
- unsigned W = Divisor.getBitWidth();
- APInt Factor = Divisor.zext(W + 1)
- .multiplicativeInverse(APInt::getSignedMinValue(W + 1))
- .trunc(W);
+ APInt Factor = Divisor.multiplicativeInverse();
Shifts.push_back(MIB.buildConstant(ScalarShiftAmtTy, Shift).getReg(0));
Factors.push_back(MIB.buildConstant(ScalarTy, Factor).getReg(0));
return true;
diff --git a/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp b/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp
index 797bbf7..95c6a35 100644
--- a/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp
+++ b/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp
@@ -3006,6 +3006,15 @@ LegalizerHelper::widenScalar(MachineInstr &MI, unsigned TypeIdx, LLT WideTy) {
Observer.changedInstr(MI);
return Legalized;
}
+ case TargetOpcode::G_SPLAT_VECTOR: {
+ if (TypeIdx != 1)
+ return UnableToLegalize;
+
+ Observer.changingInstr(MI);
+ widenScalarSrc(MI, WideTy, 1, TargetOpcode::G_ANYEXT);
+ Observer.changedInstr(MI);
+ return Legalized;
+ }
}
}
diff --git a/llvm/lib/CodeGen/GlobalISel/MachineIRBuilder.cpp b/llvm/lib/CodeGen/GlobalISel/MachineIRBuilder.cpp
index b8ba782..6b35caf 100644
--- a/llvm/lib/CodeGen/GlobalISel/MachineIRBuilder.cpp
+++ b/llvm/lib/CodeGen/GlobalISel/MachineIRBuilder.cpp
@@ -1278,7 +1278,7 @@ MachineIRBuilder::buildInstr(unsigned Opc, ArrayRef<DstOp> DstOps,
return DstTy.isScalar();
else
return DstTy.isVector() &&
- DstTy.getNumElements() == Op0Ty.getNumElements();
+ DstTy.getElementCount() == Op0Ty.getElementCount();
}() && "Type Mismatch");
break;
}
diff --git a/llvm/lib/CodeGen/MIRPrinter.cpp b/llvm/lib/CodeGen/MIRPrinter.cpp
index bbc6d39..bf3aee6 100644
--- a/llvm/lib/CodeGen/MIRPrinter.cpp
+++ b/llvm/lib/CodeGen/MIRPrinter.cpp
@@ -69,6 +69,8 @@ static cl::opt<bool> SimplifyMIR(
static cl::opt<bool> PrintLocations("mir-debug-loc", cl::Hidden, cl::init(true),
cl::desc("Print MIR debug-locations"));
+extern cl::opt<bool> WriteNewDbgInfoFormat;
+
namespace {
/// This structure describes how to print out stack object references.
@@ -986,29 +988,19 @@ void MIRFormatter::printIRValue(raw_ostream &OS, const Value &V,
}
void llvm::printMIR(raw_ostream &OS, const Module &M) {
- // RemoveDIs: as there's no textual form for DbgRecords yet, print debug-info
- // in dbg.value format.
- bool IsNewDbgInfoFormat = M.IsNewDbgInfoFormat;
- if (IsNewDbgInfoFormat)
- const_cast<Module &>(M).convertFromNewDbgValues();
+ ScopedDbgInfoFormatSetter FormatSetter(const_cast<Module &>(M),
+ WriteNewDbgInfoFormat);
yaml::Output Out(OS);
Out << const_cast<Module &>(M);
-
- if (IsNewDbgInfoFormat)
- const_cast<Module &>(M).convertToNewDbgValues();
}
void llvm::printMIR(raw_ostream &OS, const MachineFunction &MF) {
// RemoveDIs: as there's no textual form for DbgRecords yet, print debug-info
// in dbg.value format.
- bool IsNewDbgInfoFormat = MF.getFunction().IsNewDbgInfoFormat;
- if (IsNewDbgInfoFormat)
- const_cast<Function &>(MF.getFunction()).convertFromNewDbgValues();
+ ScopedDbgInfoFormatSetter FormatSetter(
+ const_cast<Function &>(MF.getFunction()), WriteNewDbgInfoFormat);
MIRPrinter Printer(OS);
Printer.print(MF);
-
- if (IsNewDbgInfoFormat)
- const_cast<Function &>(MF.getFunction()).convertToNewDbgValues();
}
diff --git a/llvm/lib/CodeGen/MachinePipeliner.cpp b/llvm/lib/CodeGen/MachinePipeliner.cpp
index eb42a78..b9c6765 100644
--- a/llvm/lib/CodeGen/MachinePipeliner.cpp
+++ b/llvm/lib/CodeGen/MachinePipeliner.cpp
@@ -1268,7 +1268,7 @@ private:
// Calculate the upper limit of each pressure set
void computePressureSetLimit(const RegisterClassInfo &RCI) {
for (unsigned PSet = 0; PSet < PSetNum; PSet++)
- PressureSetLimit[PSet] = RCI.getRegPressureSetLimit(PSet);
+ PressureSetLimit[PSet] = TRI->getRegPressureSetLimit(MF, PSet);
// We assume fixed registers, such as stack pointer, are already in use.
// Therefore subtracting the weight of the fixed registers from the limit of
diff --git a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
index 2f46b23..f20080c 100644
--- a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
@@ -1164,19 +1164,20 @@ SDValue DAGCombiner::reassociateOpsCommutative(unsigned Opc, const SDLoc &DL,
SDValue N01 = N0.getOperand(1);
if (DAG.isConstantIntBuildVectorOrConstantInt(peekThroughBitcasts(N01))) {
+ SDNodeFlags NewFlags;
+ if (N0.getOpcode() == ISD::ADD && N0->getFlags().hasNoUnsignedWrap() &&
+ Flags.hasNoUnsignedWrap())
+ NewFlags.setNoUnsignedWrap(true);
+
if (DAG.isConstantIntBuildVectorOrConstantInt(peekThroughBitcasts(N1))) {
// Reassociate: (op (op x, c1), c2) -> (op x, (op c1, c2))
if (SDValue OpNode = DAG.FoldConstantArithmetic(Opc, DL, VT, {N01, N1}))
- return DAG.getNode(Opc, DL, VT, N00, OpNode);
+ return DAG.getNode(Opc, DL, VT, N00, OpNode, NewFlags);
return SDValue();
}
if (TLI.isReassocProfitable(DAG, N0, N1)) {
// Reassociate: (op (op x, c1), y) -> (op (op x, y), c1)
// iff (op x, c1) has one use
- SDNodeFlags NewFlags;
- if (N0.getOpcode() == ISD::ADD && N0->getFlags().hasNoUnsignedWrap() &&
- Flags.hasNoUnsignedWrap())
- NewFlags.setNoUnsignedWrap(true);
SDValue OpNode = DAG.getNode(Opc, SDLoc(N0), VT, N00, N1, NewFlags);
return DAG.getNode(Opc, DL, VT, OpNode, N01, NewFlags);
}
@@ -3053,17 +3054,15 @@ static SDValue foldAddSubMasked1(bool IsAdd, SDValue N0, SDValue N1,
/// Helper for doing combines based on N0 and N1 being added to each other.
SDValue DAGCombiner::visitADDLikeCommutative(SDValue N0, SDValue N1,
- SDNode *LocReference) {
+ SDNode *LocReference) {
EVT VT = N0.getValueType();
SDLoc DL(LocReference);
// fold (add x, shl(0 - y, n)) -> sub(x, shl(y, n))
- if (N1.getOpcode() == ISD::SHL && N1.getOperand(0).getOpcode() == ISD::SUB &&
- isNullOrNullSplat(N1.getOperand(0).getOperand(0)))
+ SDValue Y, N;
+ if (sd_match(N1, m_Shl(m_Neg(m_Value(Y)), m_Value(N))))
return DAG.getNode(ISD::SUB, DL, VT, N0,
- DAG.getNode(ISD::SHL, DL, VT,
- N1.getOperand(0).getOperand(1),
- N1.getOperand(1)));
+ DAG.getNode(ISD::SHL, DL, VT, Y, N));
if (SDValue V = foldAddSubMasked1(true, N0, N1, DAG, DL))
return V;
@@ -12056,6 +12055,13 @@ SDValue DAGCombiner::foldVSelectOfConstants(SDNode *N) {
}
SDValue DAGCombiner::visitVP_SELECT(SDNode *N) {
+ SDValue N0 = N->getOperand(0);
+ SDValue N1 = N->getOperand(1);
+ SDValue N2 = N->getOperand(2);
+
+ if (SDValue V = DAG.simplifySelect(N0, N1, N2))
+ return V;
+
if (SDValue V = foldBoolSelectToLogic<VPMatchContext>(N, DAG))
return V;
@@ -22260,12 +22266,6 @@ SDValue DAGCombiner::visitEXTRACT_VECTOR_ELT(SDNode *N) {
IndexC->getAPIntValue().uge(VecVT.getVectorNumElements()))
return DAG.getUNDEF(ScalarVT);
- // extract_vector_elt(freeze(x)), idx -> freeze(extract_vector_elt(x)), idx
- if (VecOp.hasOneUse() && VecOp.getOpcode() == ISD::FREEZE) {
- return DAG.getFreeze(DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, ScalarVT,
- VecOp.getOperand(0), Index));
- }
-
// extract_vector_elt (build_vector x, y), 1 -> y
if (((IndexC && VecOp.getOpcode() == ISD::BUILD_VECTOR) ||
VecOp.getOpcode() == ISD::SPLAT_VECTOR) &&
diff --git a/llvm/lib/CodeGen/SelectionDAG/LegalizeFloatTypes.cpp b/llvm/lib/CodeGen/SelectionDAG/LegalizeFloatTypes.cpp
index 3332c02..a8b1f41 100644
--- a/llvm/lib/CodeGen/SelectionDAG/LegalizeFloatTypes.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/LegalizeFloatTypes.cpp
@@ -53,6 +53,7 @@ void DAGTypeLegalizer::SoftenFloatResult(SDNode *N, unsigned ResNo) {
SDValue R = SDValue();
switch (N->getOpcode()) {
+ // clang-format off
default:
#ifndef NDEBUG
dbgs() << "SoftenFloatResult #" << ResNo << ": ";
@@ -115,9 +116,7 @@ void DAGTypeLegalizer::SoftenFloatResult(SDNode *N, unsigned ResNo) {
case ISD::FPOWI:
case ISD::FLDEXP:
case ISD::STRICT_FLDEXP: R = SoftenFloatRes_ExpOp(N); break;
- case ISD::FFREXP:
- R = SoftenFloatRes_FFREXP(N);
- break;
+ case ISD::FFREXP: R = SoftenFloatRes_FFREXP(N); break;
case ISD::STRICT_FREM:
case ISD::FREM: R = SoftenFloatRes_FREM(N); break;
case ISD::STRICT_FRINT:
@@ -150,14 +149,11 @@ void DAGTypeLegalizer::SoftenFloatResult(SDNode *N, unsigned ResNo) {
case ISD::VECREDUCE_FMIN:
case ISD::VECREDUCE_FMAX:
case ISD::VECREDUCE_FMAXIMUM:
- case ISD::VECREDUCE_FMINIMUM:
- R = SoftenFloatRes_VECREDUCE(N);
- break;
+ case ISD::VECREDUCE_FMINIMUM: R = SoftenFloatRes_VECREDUCE(N); break;
case ISD::VECREDUCE_SEQ_FADD:
- case ISD::VECREDUCE_SEQ_FMUL:
- R = SoftenFloatRes_VECREDUCE_SEQ(N);
- break;
- }
+ case ISD::VECREDUCE_SEQ_FMUL: R = SoftenFloatRes_VECREDUCE_SEQ(N); break;
+ // clang-format on
+ }
// If R is null, the sub-method took care of registering the result.
if (R.getNode()) {
diff --git a/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp b/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
index 8c543ae..1dd0fa4 100644
--- a/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
@@ -5149,6 +5149,17 @@ bool SelectionDAG::canCreateUndefOrPoison(SDValue Op, const APInt &DemandedElts,
case ISD::OR:
return ConsiderFlags && Op->getFlags().hasDisjoint();
+ case ISD::SCALAR_TO_VECTOR:
+ // Check if we demand any upper (undef) elements.
+ return !PoisonOnly && DemandedElts.ugt(1);
+
+ case ISD::EXTRACT_VECTOR_ELT: {
+ // Ensure that the element index is in bounds.
+ EVT VecVT = Op.getOperand(0).getValueType();
+ KnownBits KnownIdx = computeKnownBits(Op.getOperand(1), Depth + 1);
+ return KnownIdx.getMaxValue().uge(VecVT.getVectorMinNumElements());
+ }
+
case ISD::INSERT_VECTOR_ELT:{
// Ensure that the element index is in bounds.
EVT VecVT = Op.getOperand(0).getValueType();
@@ -11545,30 +11556,32 @@ bool llvm::isNeutralConstant(unsigned Opcode, SDNodeFlags Flags, SDValue V,
unsigned OperandNo) {
// NOTE: The cases should match with IR's ConstantExpr::getBinOpIdentity().
// TODO: Target-specific opcodes could be added.
- if (auto *Const = isConstOrConstSplat(V)) {
+ if (auto *ConstV = isConstOrConstSplat(V, /*AllowUndefs*/ false,
+ /*AllowTruncation*/ true)) {
+ APInt Const = ConstV->getAPIntValue().trunc(V.getScalarValueSizeInBits());
switch (Opcode) {
case ISD::ADD:
case ISD::OR:
case ISD::XOR:
case ISD::UMAX:
- return Const->isZero();
+ return Const.isZero();
case ISD::MUL:
- return Const->isOne();
+ return Const.isOne();
case ISD::AND:
case ISD::UMIN:
- return Const->isAllOnes();
+ return Const.isAllOnes();
case ISD::SMAX:
- return Const->isMinSignedValue();
+ return Const.isMinSignedValue();
case ISD::SMIN:
- return Const->isMaxSignedValue();
+ return Const.isMaxSignedValue();
case ISD::SUB:
case ISD::SHL:
case ISD::SRA:
case ISD::SRL:
- return OperandNo == 1 && Const->isZero();
+ return OperandNo == 1 && Const.isZero();
case ISD::UDIV:
case ISD::SDIV:
- return OperandNo == 1 && Const->isOne();
+ return OperandNo == 1 && Const.isOne();
}
} else if (auto *ConstFP = isConstOrConstSplatFP(V)) {
switch (Opcode) {
diff --git a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGDumper.cpp b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGDumper.cpp
index 20375a0..6691aa4 100644
--- a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGDumper.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGDumper.cpp
@@ -456,6 +456,7 @@ std::string SDNode::getOperationName(const SelectionDAG *G) const {
case ISD::CONVERGENCECTRL_ANCHOR: return "convergencectrl_anchor";
case ISD::CONVERGENCECTRL_ENTRY: return "convergencectrl_entry";
case ISD::CONVERGENCECTRL_LOOP: return "convergencectrl_loop";
+ case ISD::CONVERGENCECTRL_GLUE: return "convergencectrl_glue";
// Bit manipulation
case ISD::ABS: return "abs";
diff --git a/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp b/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp
index 962f0d9..409d66a 100644
--- a/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp
@@ -742,6 +742,13 @@ SDValue TargetLowering::SimplifyMultipleUseDemandedBits(
break;
}
+ case ISD::FREEZE: {
+ SDValue N0 = Op.getOperand(0);
+ if (DAG.isGuaranteedNotToBeUndefOrPoison(N0, DemandedElts,
+ /*PoisonOnly=*/false))
+ return N0;
+ break;
+ }
case ISD::AND: {
LHSKnown = DAG.computeKnownBits(Op.getOperand(0), DemandedElts, Depth + 1);
RHSKnown = DAG.computeKnownBits(Op.getOperand(1), DemandedElts, Depth + 1);
@@ -3184,6 +3191,20 @@ bool TargetLowering::SimplifyDemandedVectorElts(
}
break;
}
+ case ISD::FREEZE: {
+ SDValue N0 = Op.getOperand(0);
+ if (TLO.DAG.isGuaranteedNotToBeUndefOrPoison(N0, DemandedElts,
+ /*PoisonOnly=*/false))
+ return TLO.CombineTo(Op, N0);
+
+ // TODO: Replace this with the general fold from DAGCombiner::visitFREEZE
+ // freeze(op(x, ...)) -> op(freeze(x), ...).
+ if (N0.getOpcode() == ISD::SCALAR_TO_VECTOR && DemandedElts == 1)
+ return TLO.CombineTo(
+ Op, TLO.DAG.getNode(ISD::SCALAR_TO_VECTOR, DL, VT,
+ TLO.DAG.getFreeze(N0.getOperand(0))));
+ break;
+ }
case ISD::BUILD_VECTOR: {
// Check all elements and simplify any unused elements with UNDEF.
if (!DemandedElts.isAllOnes()) {
@@ -3524,6 +3545,10 @@ bool TargetLowering::SimplifyDemandedVectorElts(
}
[[fallthrough]];
}
+ case ISD::AVGCEILS:
+ case ISD::AVGCEILU:
+ case ISD::AVGFLOORS:
+ case ISD::AVGFLOORU:
case ISD::OR:
case ISD::XOR:
case ISD::SUB:
@@ -6046,11 +6071,7 @@ static SDValue BuildExactSDIV(const TargetLowering &TLI, SDNode *N,
Divisor.ashrInPlace(Shift);
UseSRA = true;
}
- // Calculate the multiplicative inverse, using Newton's method.
- APInt t;
- APInt Factor = Divisor;
- while ((t = Divisor * Factor) != 1)
- Factor *= APInt(Divisor.getBitWidth(), 2) - t;
+ APInt Factor = Divisor.multiplicativeInverse();
Shifts.push_back(DAG.getConstant(Shift, dl, ShSVT));
Factors.push_back(DAG.getConstant(Factor, dl, SVT));
return true;
@@ -6639,10 +6660,7 @@ TargetLowering::prepareUREMEqFold(EVT SETCCVT, SDValue REMNode,
// P = inv(D0, 2^W)
// 2^W requires W + 1 bits, so we have to extend and then truncate.
unsigned W = D.getBitWidth();
- APInt P = D0.zext(W + 1)
- .multiplicativeInverse(APInt::getSignedMinValue(W + 1))
- .trunc(W);
- assert(!P.isZero() && "No multiplicative inverse!"); // unreachable
+ APInt P = D0.multiplicativeInverse();
assert((D0 * P).isOne() && "Multiplicative inverse basic check failed.");
// Q = floor((2^W - 1) u/ D)
@@ -6897,10 +6915,7 @@ TargetLowering::prepareSREMEqFold(EVT SETCCVT, SDValue REMNode,
// P = inv(D0, 2^W)
// 2^W requires W + 1 bits, so we have to extend and then truncate.
unsigned W = D.getBitWidth();
- APInt P = D0.zext(W + 1)
- .multiplicativeInverse(APInt::getSignedMinValue(W + 1))
- .trunc(W);
- assert(!P.isZero() && "No multiplicative inverse!"); // unreachable
+ APInt P = D0.multiplicativeInverse();
assert((D0 * P).isOne() && "Multiplicative inverse basic check failed.");
// A = floor((2^(W - 1) - 1) / D0) & -2^K
@@ -7626,7 +7641,7 @@ bool TargetLowering::expandMUL(SDNode *N, SDValue &Lo, SDValue &Hi, EVT HiLoVT,
//
// For division, we can compute the remainder using the algorithm described
// above, subtract it from the dividend to get an exact multiple of Constant.
-// Then multiply that extact multiply by the multiplicative inverse modulo
+// Then multiply that exact multiply by the multiplicative inverse modulo
// (1 << (BitWidth / 2)) to get the quotient.
// If Constant is even, we can shift right the dividend and the divisor by the
@@ -7761,10 +7776,7 @@ bool TargetLowering::expandDIVREMByConstant(SDNode *N,
// Multiply by the multiplicative inverse of the divisor modulo
// (1 << BitWidth).
- APInt Mod = APInt::getSignedMinValue(BitWidth + 1);
- APInt MulFactor = Divisor.zext(BitWidth + 1);
- MulFactor = MulFactor.multiplicativeInverse(Mod);
- MulFactor = MulFactor.trunc(BitWidth);
+ APInt MulFactor = Divisor.multiplicativeInverse();
SDValue Quotient = DAG.getNode(ISD::MUL, dl, VT, Dividend,
DAG.getConstant(MulFactor, dl, VT));
diff --git a/llvm/lib/CodeGen/ShrinkWrap.cpp b/llvm/lib/CodeGen/ShrinkWrap.cpp
index ab57d08..a4b2299 100644
--- a/llvm/lib/CodeGen/ShrinkWrap.cpp
+++ b/llvm/lib/CodeGen/ShrinkWrap.cpp
@@ -161,9 +161,11 @@ class ShrinkWrap : public MachineFunctionPass {
/// Current MachineFunction.
MachineFunction *MachineFunc = nullptr;
- /// Is `true` for block numbers where we can guarantee no stack access
- /// or computation of stack-relative addresses on any CFG path including
- /// the block itself.
+ /// Is `true` for the block numbers where we assume possible stack accesses
+ /// or computation of stack-relative addresses on any CFG path including the
+ /// block itself. Is `false` for basic blocks where we can guarantee the
+ /// opposite. False positives won't lead to incorrect analysis results,
+ /// therefore this approach is fair.
BitVector StackAddressUsedBlockInfo;
/// Check if \p MI uses or defines a callee-saved register or
@@ -948,6 +950,9 @@ bool ShrinkWrap::runOnMachineFunction(MachineFunction &MF) {
bool Changed = false;
+ // Initially, conservatively assume that stack addresses can be used in each
+ // basic block and change the state only for those basic blocks for which we
+ // were able to prove the opposite.
StackAddressUsedBlockInfo.resize(MF.getNumBlockIDs(), true);
bool HasCandidate = performShrinkWrapping(RPOT, RS.get());
StackAddressUsedBlockInfo.clear();