diff options
Diffstat (limited to 'llvm/lib/CodeGen')
-rw-r--r-- | llvm/lib/CodeGen/AsmPrinter/AsmPrinter.cpp | 2 | ||||
-rw-r--r-- | llvm/lib/CodeGen/ExpandLargeFpConvert.cpp | 49 | ||||
-rw-r--r-- | llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp | 5 | ||||
-rw-r--r-- | llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp | 9 | ||||
-rw-r--r-- | llvm/lib/CodeGen/GlobalISel/MachineIRBuilder.cpp | 2 | ||||
-rw-r--r-- | llvm/lib/CodeGen/MIRPrinter.cpp | 20 | ||||
-rw-r--r-- | llvm/lib/CodeGen/MachinePipeliner.cpp | 2 | ||||
-rw-r--r-- | llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp | 34 | ||||
-rw-r--r-- | llvm/lib/CodeGen/SelectionDAG/LegalizeFloatTypes.cpp | 16 | ||||
-rw-r--r-- | llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp | 29 | ||||
-rw-r--r-- | llvm/lib/CodeGen/SelectionDAG/SelectionDAGDumper.cpp | 1 | ||||
-rw-r--r-- | llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp | 48 | ||||
-rw-r--r-- | llvm/lib/CodeGen/ShrinkWrap.cpp | 11 |
13 files changed, 143 insertions, 85 deletions
diff --git a/llvm/lib/CodeGen/AsmPrinter/AsmPrinter.cpp b/llvm/lib/CodeGen/AsmPrinter/AsmPrinter.cpp index a155387..293bb5a 100644 --- a/llvm/lib/CodeGen/AsmPrinter/AsmPrinter.cpp +++ b/llvm/lib/CodeGen/AsmPrinter/AsmPrinter.cpp @@ -2927,7 +2927,7 @@ bool AsmPrinter::emitSpecialLLVMGlobal(const GlobalVariable *GV) { return true; } - report_fatal_error("unknown special variable"); + report_fatal_error("unknown special variable with appending linkage"); } /// EmitLLVMUsedList - For targets that define a MAI::UsedDirective, mark each diff --git a/llvm/lib/CodeGen/ExpandLargeFpConvert.cpp b/llvm/lib/CodeGen/ExpandLargeFpConvert.cpp index 4ec966e..6213530 100644 --- a/llvm/lib/CodeGen/ExpandLargeFpConvert.cpp +++ b/llvm/lib/CodeGen/ExpandLargeFpConvert.cpp @@ -568,8 +568,29 @@ static void expandIToFP(Instruction *IToFP) { IToFP->eraseFromParent(); } +static void scalarize(Instruction *I, SmallVectorImpl<Instruction *> &Replace) { + VectorType *VTy = cast<FixedVectorType>(I->getType()); + + IRBuilder<> Builder(I); + + unsigned NumElements = VTy->getElementCount().getFixedValue(); + Value *Result = PoisonValue::get(VTy); + for (unsigned Idx = 0; Idx < NumElements; ++Idx) { + Value *Ext = Builder.CreateExtractElement(I->getOperand(0), Idx); + Value *Cast = Builder.CreateCast(cast<CastInst>(I)->getOpcode(), Ext, + I->getType()->getScalarType()); + Result = Builder.CreateInsertElement(Result, Cast, Idx); + if (isa<Instruction>(Cast)) + Replace.push_back(cast<Instruction>(Cast)); + } + I->replaceAllUsesWith(Result); + I->dropAllReferences(); + I->eraseFromParent(); +} + static bool runImpl(Function &F, const TargetLowering &TLI) { SmallVector<Instruction *, 4> Replace; + SmallVector<Instruction *, 4> ReplaceVector; bool Modified = false; unsigned MaxLegalFpConvertBitWidth = @@ -584,29 +605,36 @@ static bool runImpl(Function &F, const TargetLowering &TLI) { switch (I.getOpcode()) { case Instruction::FPToUI: case Instruction::FPToSI: { - // TODO: This pass doesn't handle vectors. - if (I.getOperand(0)->getType()->isVectorTy()) + // TODO: This pass doesn't handle scalable vectors. + if (I.getOperand(0)->getType()->isScalableTy()) continue; - auto *IntTy = dyn_cast<IntegerType>(I.getType()); + auto *IntTy = dyn_cast<IntegerType>(I.getType()->getScalarType()); if (IntTy->getIntegerBitWidth() <= MaxLegalFpConvertBitWidth) continue; - Replace.push_back(&I); + if (I.getOperand(0)->getType()->isVectorTy()) + ReplaceVector.push_back(&I); + else + Replace.push_back(&I); Modified = true; break; } case Instruction::UIToFP: case Instruction::SIToFP: { - // TODO: This pass doesn't handle vectors. - if (I.getOperand(0)->getType()->isVectorTy()) + // TODO: This pass doesn't handle scalable vectors. + if (I.getOperand(0)->getType()->isScalableTy()) continue; - auto *IntTy = dyn_cast<IntegerType>(I.getOperand(0)->getType()); + auto *IntTy = + dyn_cast<IntegerType>(I.getOperand(0)->getType()->getScalarType()); if (IntTy->getIntegerBitWidth() <= MaxLegalFpConvertBitWidth) continue; - Replace.push_back(&I); + if (I.getOperand(0)->getType()->isVectorTy()) + ReplaceVector.push_back(&I); + else + Replace.push_back(&I); Modified = true; break; } @@ -615,6 +643,11 @@ static bool runImpl(Function &F, const TargetLowering &TLI) { } } + while (!ReplaceVector.empty()) { + Instruction *I = ReplaceVector.pop_back_val(); + scalarize(I, Replace); + } + if (Replace.empty()) return false; diff --git a/llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp b/llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp index 5cf7a33..e53e35d 100644 --- a/llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp +++ b/llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp @@ -5201,10 +5201,7 @@ MachineInstr *CombinerHelper::buildSDivUsingMul(MachineInstr &MI) { // Calculate the multiplicative inverse modulo BW. // 2^W requires W + 1 bits, so we have to extend and then truncate. - unsigned W = Divisor.getBitWidth(); - APInt Factor = Divisor.zext(W + 1) - .multiplicativeInverse(APInt::getSignedMinValue(W + 1)) - .trunc(W); + APInt Factor = Divisor.multiplicativeInverse(); Shifts.push_back(MIB.buildConstant(ScalarShiftAmtTy, Shift).getReg(0)); Factors.push_back(MIB.buildConstant(ScalarTy, Factor).getReg(0)); return true; diff --git a/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp b/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp index 797bbf7..95c6a35 100644 --- a/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp +++ b/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp @@ -3006,6 +3006,15 @@ LegalizerHelper::widenScalar(MachineInstr &MI, unsigned TypeIdx, LLT WideTy) { Observer.changedInstr(MI); return Legalized; } + case TargetOpcode::G_SPLAT_VECTOR: { + if (TypeIdx != 1) + return UnableToLegalize; + + Observer.changingInstr(MI); + widenScalarSrc(MI, WideTy, 1, TargetOpcode::G_ANYEXT); + Observer.changedInstr(MI); + return Legalized; + } } } diff --git a/llvm/lib/CodeGen/GlobalISel/MachineIRBuilder.cpp b/llvm/lib/CodeGen/GlobalISel/MachineIRBuilder.cpp index b8ba782..6b35caf 100644 --- a/llvm/lib/CodeGen/GlobalISel/MachineIRBuilder.cpp +++ b/llvm/lib/CodeGen/GlobalISel/MachineIRBuilder.cpp @@ -1278,7 +1278,7 @@ MachineIRBuilder::buildInstr(unsigned Opc, ArrayRef<DstOp> DstOps, return DstTy.isScalar(); else return DstTy.isVector() && - DstTy.getNumElements() == Op0Ty.getNumElements(); + DstTy.getElementCount() == Op0Ty.getElementCount(); }() && "Type Mismatch"); break; } diff --git a/llvm/lib/CodeGen/MIRPrinter.cpp b/llvm/lib/CodeGen/MIRPrinter.cpp index bbc6d39..bf3aee6 100644 --- a/llvm/lib/CodeGen/MIRPrinter.cpp +++ b/llvm/lib/CodeGen/MIRPrinter.cpp @@ -69,6 +69,8 @@ static cl::opt<bool> SimplifyMIR( static cl::opt<bool> PrintLocations("mir-debug-loc", cl::Hidden, cl::init(true), cl::desc("Print MIR debug-locations")); +extern cl::opt<bool> WriteNewDbgInfoFormat; + namespace { /// This structure describes how to print out stack object references. @@ -986,29 +988,19 @@ void MIRFormatter::printIRValue(raw_ostream &OS, const Value &V, } void llvm::printMIR(raw_ostream &OS, const Module &M) { - // RemoveDIs: as there's no textual form for DbgRecords yet, print debug-info - // in dbg.value format. - bool IsNewDbgInfoFormat = M.IsNewDbgInfoFormat; - if (IsNewDbgInfoFormat) - const_cast<Module &>(M).convertFromNewDbgValues(); + ScopedDbgInfoFormatSetter FormatSetter(const_cast<Module &>(M), + WriteNewDbgInfoFormat); yaml::Output Out(OS); Out << const_cast<Module &>(M); - - if (IsNewDbgInfoFormat) - const_cast<Module &>(M).convertToNewDbgValues(); } void llvm::printMIR(raw_ostream &OS, const MachineFunction &MF) { // RemoveDIs: as there's no textual form for DbgRecords yet, print debug-info // in dbg.value format. - bool IsNewDbgInfoFormat = MF.getFunction().IsNewDbgInfoFormat; - if (IsNewDbgInfoFormat) - const_cast<Function &>(MF.getFunction()).convertFromNewDbgValues(); + ScopedDbgInfoFormatSetter FormatSetter( + const_cast<Function &>(MF.getFunction()), WriteNewDbgInfoFormat); MIRPrinter Printer(OS); Printer.print(MF); - - if (IsNewDbgInfoFormat) - const_cast<Function &>(MF.getFunction()).convertToNewDbgValues(); } diff --git a/llvm/lib/CodeGen/MachinePipeliner.cpp b/llvm/lib/CodeGen/MachinePipeliner.cpp index eb42a78..b9c6765 100644 --- a/llvm/lib/CodeGen/MachinePipeliner.cpp +++ b/llvm/lib/CodeGen/MachinePipeliner.cpp @@ -1268,7 +1268,7 @@ private: // Calculate the upper limit of each pressure set void computePressureSetLimit(const RegisterClassInfo &RCI) { for (unsigned PSet = 0; PSet < PSetNum; PSet++) - PressureSetLimit[PSet] = RCI.getRegPressureSetLimit(PSet); + PressureSetLimit[PSet] = TRI->getRegPressureSetLimit(MF, PSet); // We assume fixed registers, such as stack pointer, are already in use. // Therefore subtracting the weight of the fixed registers from the limit of diff --git a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp index 2f46b23..f20080c 100644 --- a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp @@ -1164,19 +1164,20 @@ SDValue DAGCombiner::reassociateOpsCommutative(unsigned Opc, const SDLoc &DL, SDValue N01 = N0.getOperand(1); if (DAG.isConstantIntBuildVectorOrConstantInt(peekThroughBitcasts(N01))) { + SDNodeFlags NewFlags; + if (N0.getOpcode() == ISD::ADD && N0->getFlags().hasNoUnsignedWrap() && + Flags.hasNoUnsignedWrap()) + NewFlags.setNoUnsignedWrap(true); + if (DAG.isConstantIntBuildVectorOrConstantInt(peekThroughBitcasts(N1))) { // Reassociate: (op (op x, c1), c2) -> (op x, (op c1, c2)) if (SDValue OpNode = DAG.FoldConstantArithmetic(Opc, DL, VT, {N01, N1})) - return DAG.getNode(Opc, DL, VT, N00, OpNode); + return DAG.getNode(Opc, DL, VT, N00, OpNode, NewFlags); return SDValue(); } if (TLI.isReassocProfitable(DAG, N0, N1)) { // Reassociate: (op (op x, c1), y) -> (op (op x, y), c1) // iff (op x, c1) has one use - SDNodeFlags NewFlags; - if (N0.getOpcode() == ISD::ADD && N0->getFlags().hasNoUnsignedWrap() && - Flags.hasNoUnsignedWrap()) - NewFlags.setNoUnsignedWrap(true); SDValue OpNode = DAG.getNode(Opc, SDLoc(N0), VT, N00, N1, NewFlags); return DAG.getNode(Opc, DL, VT, OpNode, N01, NewFlags); } @@ -3053,17 +3054,15 @@ static SDValue foldAddSubMasked1(bool IsAdd, SDValue N0, SDValue N1, /// Helper for doing combines based on N0 and N1 being added to each other. SDValue DAGCombiner::visitADDLikeCommutative(SDValue N0, SDValue N1, - SDNode *LocReference) { + SDNode *LocReference) { EVT VT = N0.getValueType(); SDLoc DL(LocReference); // fold (add x, shl(0 - y, n)) -> sub(x, shl(y, n)) - if (N1.getOpcode() == ISD::SHL && N1.getOperand(0).getOpcode() == ISD::SUB && - isNullOrNullSplat(N1.getOperand(0).getOperand(0))) + SDValue Y, N; + if (sd_match(N1, m_Shl(m_Neg(m_Value(Y)), m_Value(N)))) return DAG.getNode(ISD::SUB, DL, VT, N0, - DAG.getNode(ISD::SHL, DL, VT, - N1.getOperand(0).getOperand(1), - N1.getOperand(1))); + DAG.getNode(ISD::SHL, DL, VT, Y, N)); if (SDValue V = foldAddSubMasked1(true, N0, N1, DAG, DL)) return V; @@ -12056,6 +12055,13 @@ SDValue DAGCombiner::foldVSelectOfConstants(SDNode *N) { } SDValue DAGCombiner::visitVP_SELECT(SDNode *N) { + SDValue N0 = N->getOperand(0); + SDValue N1 = N->getOperand(1); + SDValue N2 = N->getOperand(2); + + if (SDValue V = DAG.simplifySelect(N0, N1, N2)) + return V; + if (SDValue V = foldBoolSelectToLogic<VPMatchContext>(N, DAG)) return V; @@ -22260,12 +22266,6 @@ SDValue DAGCombiner::visitEXTRACT_VECTOR_ELT(SDNode *N) { IndexC->getAPIntValue().uge(VecVT.getVectorNumElements())) return DAG.getUNDEF(ScalarVT); - // extract_vector_elt(freeze(x)), idx -> freeze(extract_vector_elt(x)), idx - if (VecOp.hasOneUse() && VecOp.getOpcode() == ISD::FREEZE) { - return DAG.getFreeze(DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, ScalarVT, - VecOp.getOperand(0), Index)); - } - // extract_vector_elt (build_vector x, y), 1 -> y if (((IndexC && VecOp.getOpcode() == ISD::BUILD_VECTOR) || VecOp.getOpcode() == ISD::SPLAT_VECTOR) && diff --git a/llvm/lib/CodeGen/SelectionDAG/LegalizeFloatTypes.cpp b/llvm/lib/CodeGen/SelectionDAG/LegalizeFloatTypes.cpp index 3332c02..a8b1f41 100644 --- a/llvm/lib/CodeGen/SelectionDAG/LegalizeFloatTypes.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/LegalizeFloatTypes.cpp @@ -53,6 +53,7 @@ void DAGTypeLegalizer::SoftenFloatResult(SDNode *N, unsigned ResNo) { SDValue R = SDValue(); switch (N->getOpcode()) { + // clang-format off default: #ifndef NDEBUG dbgs() << "SoftenFloatResult #" << ResNo << ": "; @@ -115,9 +116,7 @@ void DAGTypeLegalizer::SoftenFloatResult(SDNode *N, unsigned ResNo) { case ISD::FPOWI: case ISD::FLDEXP: case ISD::STRICT_FLDEXP: R = SoftenFloatRes_ExpOp(N); break; - case ISD::FFREXP: - R = SoftenFloatRes_FFREXP(N); - break; + case ISD::FFREXP: R = SoftenFloatRes_FFREXP(N); break; case ISD::STRICT_FREM: case ISD::FREM: R = SoftenFloatRes_FREM(N); break; case ISD::STRICT_FRINT: @@ -150,14 +149,11 @@ void DAGTypeLegalizer::SoftenFloatResult(SDNode *N, unsigned ResNo) { case ISD::VECREDUCE_FMIN: case ISD::VECREDUCE_FMAX: case ISD::VECREDUCE_FMAXIMUM: - case ISD::VECREDUCE_FMINIMUM: - R = SoftenFloatRes_VECREDUCE(N); - break; + case ISD::VECREDUCE_FMINIMUM: R = SoftenFloatRes_VECREDUCE(N); break; case ISD::VECREDUCE_SEQ_FADD: - case ISD::VECREDUCE_SEQ_FMUL: - R = SoftenFloatRes_VECREDUCE_SEQ(N); - break; - } + case ISD::VECREDUCE_SEQ_FMUL: R = SoftenFloatRes_VECREDUCE_SEQ(N); break; + // clang-format on + } // If R is null, the sub-method took care of registering the result. if (R.getNode()) { diff --git a/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp b/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp index 8c543ae..1dd0fa4 100644 --- a/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp @@ -5149,6 +5149,17 @@ bool SelectionDAG::canCreateUndefOrPoison(SDValue Op, const APInt &DemandedElts, case ISD::OR: return ConsiderFlags && Op->getFlags().hasDisjoint(); + case ISD::SCALAR_TO_VECTOR: + // Check if we demand any upper (undef) elements. + return !PoisonOnly && DemandedElts.ugt(1); + + case ISD::EXTRACT_VECTOR_ELT: { + // Ensure that the element index is in bounds. + EVT VecVT = Op.getOperand(0).getValueType(); + KnownBits KnownIdx = computeKnownBits(Op.getOperand(1), Depth + 1); + return KnownIdx.getMaxValue().uge(VecVT.getVectorMinNumElements()); + } + case ISD::INSERT_VECTOR_ELT:{ // Ensure that the element index is in bounds. EVT VecVT = Op.getOperand(0).getValueType(); @@ -11545,30 +11556,32 @@ bool llvm::isNeutralConstant(unsigned Opcode, SDNodeFlags Flags, SDValue V, unsigned OperandNo) { // NOTE: The cases should match with IR's ConstantExpr::getBinOpIdentity(). // TODO: Target-specific opcodes could be added. - if (auto *Const = isConstOrConstSplat(V)) { + if (auto *ConstV = isConstOrConstSplat(V, /*AllowUndefs*/ false, + /*AllowTruncation*/ true)) { + APInt Const = ConstV->getAPIntValue().trunc(V.getScalarValueSizeInBits()); switch (Opcode) { case ISD::ADD: case ISD::OR: case ISD::XOR: case ISD::UMAX: - return Const->isZero(); + return Const.isZero(); case ISD::MUL: - return Const->isOne(); + return Const.isOne(); case ISD::AND: case ISD::UMIN: - return Const->isAllOnes(); + return Const.isAllOnes(); case ISD::SMAX: - return Const->isMinSignedValue(); + return Const.isMinSignedValue(); case ISD::SMIN: - return Const->isMaxSignedValue(); + return Const.isMaxSignedValue(); case ISD::SUB: case ISD::SHL: case ISD::SRA: case ISD::SRL: - return OperandNo == 1 && Const->isZero(); + return OperandNo == 1 && Const.isZero(); case ISD::UDIV: case ISD::SDIV: - return OperandNo == 1 && Const->isOne(); + return OperandNo == 1 && Const.isOne(); } } else if (auto *ConstFP = isConstOrConstSplatFP(V)) { switch (Opcode) { diff --git a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGDumper.cpp b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGDumper.cpp index 20375a0..6691aa4 100644 --- a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGDumper.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGDumper.cpp @@ -456,6 +456,7 @@ std::string SDNode::getOperationName(const SelectionDAG *G) const { case ISD::CONVERGENCECTRL_ANCHOR: return "convergencectrl_anchor"; case ISD::CONVERGENCECTRL_ENTRY: return "convergencectrl_entry"; case ISD::CONVERGENCECTRL_LOOP: return "convergencectrl_loop"; + case ISD::CONVERGENCECTRL_GLUE: return "convergencectrl_glue"; // Bit manipulation case ISD::ABS: return "abs"; diff --git a/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp b/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp index 962f0d9..409d66a 100644 --- a/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp @@ -742,6 +742,13 @@ SDValue TargetLowering::SimplifyMultipleUseDemandedBits( break; } + case ISD::FREEZE: { + SDValue N0 = Op.getOperand(0); + if (DAG.isGuaranteedNotToBeUndefOrPoison(N0, DemandedElts, + /*PoisonOnly=*/false)) + return N0; + break; + } case ISD::AND: { LHSKnown = DAG.computeKnownBits(Op.getOperand(0), DemandedElts, Depth + 1); RHSKnown = DAG.computeKnownBits(Op.getOperand(1), DemandedElts, Depth + 1); @@ -3184,6 +3191,20 @@ bool TargetLowering::SimplifyDemandedVectorElts( } break; } + case ISD::FREEZE: { + SDValue N0 = Op.getOperand(0); + if (TLO.DAG.isGuaranteedNotToBeUndefOrPoison(N0, DemandedElts, + /*PoisonOnly=*/false)) + return TLO.CombineTo(Op, N0); + + // TODO: Replace this with the general fold from DAGCombiner::visitFREEZE + // freeze(op(x, ...)) -> op(freeze(x), ...). + if (N0.getOpcode() == ISD::SCALAR_TO_VECTOR && DemandedElts == 1) + return TLO.CombineTo( + Op, TLO.DAG.getNode(ISD::SCALAR_TO_VECTOR, DL, VT, + TLO.DAG.getFreeze(N0.getOperand(0)))); + break; + } case ISD::BUILD_VECTOR: { // Check all elements and simplify any unused elements with UNDEF. if (!DemandedElts.isAllOnes()) { @@ -3524,6 +3545,10 @@ bool TargetLowering::SimplifyDemandedVectorElts( } [[fallthrough]]; } + case ISD::AVGCEILS: + case ISD::AVGCEILU: + case ISD::AVGFLOORS: + case ISD::AVGFLOORU: case ISD::OR: case ISD::XOR: case ISD::SUB: @@ -6046,11 +6071,7 @@ static SDValue BuildExactSDIV(const TargetLowering &TLI, SDNode *N, Divisor.ashrInPlace(Shift); UseSRA = true; } - // Calculate the multiplicative inverse, using Newton's method. - APInt t; - APInt Factor = Divisor; - while ((t = Divisor * Factor) != 1) - Factor *= APInt(Divisor.getBitWidth(), 2) - t; + APInt Factor = Divisor.multiplicativeInverse(); Shifts.push_back(DAG.getConstant(Shift, dl, ShSVT)); Factors.push_back(DAG.getConstant(Factor, dl, SVT)); return true; @@ -6639,10 +6660,7 @@ TargetLowering::prepareUREMEqFold(EVT SETCCVT, SDValue REMNode, // P = inv(D0, 2^W) // 2^W requires W + 1 bits, so we have to extend and then truncate. unsigned W = D.getBitWidth(); - APInt P = D0.zext(W + 1) - .multiplicativeInverse(APInt::getSignedMinValue(W + 1)) - .trunc(W); - assert(!P.isZero() && "No multiplicative inverse!"); // unreachable + APInt P = D0.multiplicativeInverse(); assert((D0 * P).isOne() && "Multiplicative inverse basic check failed."); // Q = floor((2^W - 1) u/ D) @@ -6897,10 +6915,7 @@ TargetLowering::prepareSREMEqFold(EVT SETCCVT, SDValue REMNode, // P = inv(D0, 2^W) // 2^W requires W + 1 bits, so we have to extend and then truncate. unsigned W = D.getBitWidth(); - APInt P = D0.zext(W + 1) - .multiplicativeInverse(APInt::getSignedMinValue(W + 1)) - .trunc(W); - assert(!P.isZero() && "No multiplicative inverse!"); // unreachable + APInt P = D0.multiplicativeInverse(); assert((D0 * P).isOne() && "Multiplicative inverse basic check failed."); // A = floor((2^(W - 1) - 1) / D0) & -2^K @@ -7626,7 +7641,7 @@ bool TargetLowering::expandMUL(SDNode *N, SDValue &Lo, SDValue &Hi, EVT HiLoVT, // // For division, we can compute the remainder using the algorithm described // above, subtract it from the dividend to get an exact multiple of Constant. -// Then multiply that extact multiply by the multiplicative inverse modulo +// Then multiply that exact multiply by the multiplicative inverse modulo // (1 << (BitWidth / 2)) to get the quotient. // If Constant is even, we can shift right the dividend and the divisor by the @@ -7761,10 +7776,7 @@ bool TargetLowering::expandDIVREMByConstant(SDNode *N, // Multiply by the multiplicative inverse of the divisor modulo // (1 << BitWidth). - APInt Mod = APInt::getSignedMinValue(BitWidth + 1); - APInt MulFactor = Divisor.zext(BitWidth + 1); - MulFactor = MulFactor.multiplicativeInverse(Mod); - MulFactor = MulFactor.trunc(BitWidth); + APInt MulFactor = Divisor.multiplicativeInverse(); SDValue Quotient = DAG.getNode(ISD::MUL, dl, VT, Dividend, DAG.getConstant(MulFactor, dl, VT)); diff --git a/llvm/lib/CodeGen/ShrinkWrap.cpp b/llvm/lib/CodeGen/ShrinkWrap.cpp index ab57d08..a4b2299 100644 --- a/llvm/lib/CodeGen/ShrinkWrap.cpp +++ b/llvm/lib/CodeGen/ShrinkWrap.cpp @@ -161,9 +161,11 @@ class ShrinkWrap : public MachineFunctionPass { /// Current MachineFunction. MachineFunction *MachineFunc = nullptr; - /// Is `true` for block numbers where we can guarantee no stack access - /// or computation of stack-relative addresses on any CFG path including - /// the block itself. + /// Is `true` for the block numbers where we assume possible stack accesses + /// or computation of stack-relative addresses on any CFG path including the + /// block itself. Is `false` for basic blocks where we can guarantee the + /// opposite. False positives won't lead to incorrect analysis results, + /// therefore this approach is fair. BitVector StackAddressUsedBlockInfo; /// Check if \p MI uses or defines a callee-saved register or @@ -948,6 +950,9 @@ bool ShrinkWrap::runOnMachineFunction(MachineFunction &MF) { bool Changed = false; + // Initially, conservatively assume that stack addresses can be used in each + // basic block and change the state only for those basic blocks for which we + // were able to prove the opposite. StackAddressUsedBlockInfo.resize(MF.getNumBlockIDs(), true); bool HasCandidate = performShrinkWrapping(RPOT, RS.get()); StackAddressUsedBlockInfo.clear(); |