aboutsummaryrefslogtreecommitdiff
path: root/llvm/lib/CodeGen
diff options
context:
space:
mode:
Diffstat (limited to 'llvm/lib/CodeGen')
-rw-r--r--llvm/lib/CodeGen/AsmPrinter/AsmPrinter.cpp7
-rw-r--r--llvm/lib/CodeGen/AsmPrinter/AsmPrinterInlineAsm.cpp2
-rw-r--r--llvm/lib/CodeGen/AtomicExpandPass.cpp2
-rw-r--r--llvm/lib/CodeGen/BranchRelaxation.cpp14
-rw-r--r--llvm/lib/CodeGen/CodeGenPrepare.cpp2
-rw-r--r--llvm/lib/CodeGen/GlobalISel/GISelValueTracking.cpp39
-rw-r--r--llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp6
-rw-r--r--llvm/lib/CodeGen/LiveIntervals.cpp5
-rw-r--r--llvm/lib/CodeGen/MachineCopyPropagation.cpp2
-rw-r--r--llvm/lib/CodeGen/ScheduleDAGInstrs.cpp2
-rw-r--r--llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp85
-rw-r--r--llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp15
12 files changed, 118 insertions, 63 deletions
diff --git a/llvm/lib/CodeGen/AsmPrinter/AsmPrinter.cpp b/llvm/lib/CodeGen/AsmPrinter/AsmPrinter.cpp
index 219bbc9..e2af0c5 100644
--- a/llvm/lib/CodeGen/AsmPrinter/AsmPrinter.cpp
+++ b/llvm/lib/CodeGen/AsmPrinter/AsmPrinter.cpp
@@ -119,6 +119,7 @@
#include "llvm/Support/MathExtras.h"
#include "llvm/Support/Path.h"
#include "llvm/Support/VCSRevision.h"
+#include "llvm/Support/VirtualFileSystem.h"
#include "llvm/Support/raw_ostream.h"
#include "llvm/Target/TargetLoweringObjectFile.h"
#include "llvm/Target/TargetMachine.h"
@@ -476,6 +477,7 @@ void AsmPrinter::getAnalysisUsage(AnalysisUsage &AU) const {
}
bool AsmPrinter::doInitialization(Module &M) {
+ VFS = vfs::getRealFileSystem();
auto *MMIWP = getAnalysisIfAvailable<MachineModuleInfoWrapperPass>();
MMI = MMIWP ? &MMIWP->getMMI() : nullptr;
HasSplitStack = false;
@@ -1437,7 +1439,8 @@ getBBAddrMapFeature(const MachineFunction &MF, int NumMBBSectionRanges,
BrProbEnabled,
MF.hasBBSections() && NumMBBSectionRanges > 1,
static_cast<bool>(BBAddrMapSkipEmitBBEntries),
- HasCalls};
+ HasCalls,
+ false};
}
void AsmPrinter::emitBBAddrMapSection(const MachineFunction &MF) {
@@ -1682,7 +1685,7 @@ static ConstantInt *extractNumericCGTypeId(const Function &F) {
return nullptr;
}
-/// Emits .callgraph section.
+/// Emits .llvm.callgraph section.
void AsmPrinter::emitCallGraphSection(const MachineFunction &MF,
FunctionCallGraphInfo &FuncCGInfo) {
if (!MF.getTarget().Options.EmitCallGraphSection)
diff --git a/llvm/lib/CodeGen/AsmPrinter/AsmPrinterInlineAsm.cpp b/llvm/lib/CodeGen/AsmPrinter/AsmPrinterInlineAsm.cpp
index c364ffc..8dd8b9da 100644
--- a/llvm/lib/CodeGen/AsmPrinter/AsmPrinterInlineAsm.cpp
+++ b/llvm/lib/CodeGen/AsmPrinter/AsmPrinterInlineAsm.cpp
@@ -36,6 +36,7 @@
#include "llvm/Support/ErrorHandling.h"
#include "llvm/Support/MemoryBuffer.h"
#include "llvm/Support/SourceMgr.h"
+#include "llvm/Support/VirtualFileSystem.h"
#include "llvm/Support/raw_ostream.h"
#include "llvm/Target/TargetMachine.h"
using namespace llvm;
@@ -98,6 +99,7 @@ void AsmPrinter::emitInlineAsm(StringRef Str, const MCSubtargetInfo &STI,
unsigned BufNum = addInlineAsmDiagBuffer(Str, LocMDNode);
SourceMgr &SrcMgr = *MMI->getContext().getInlineSourceManager();
SrcMgr.setIncludeDirs(MCOptions.IASSearchPaths);
+ SrcMgr.setVirtualFileSystem(VFS);
std::unique_ptr<MCAsmParser> Parser(
createMCAsmParser(SrcMgr, OutContext, *OutStreamer, *MAI, BufNum));
diff --git a/llvm/lib/CodeGen/AtomicExpandPass.cpp b/llvm/lib/CodeGen/AtomicExpandPass.cpp
index 4931403..53f1cfe2 100644
--- a/llvm/lib/CodeGen/AtomicExpandPass.cpp
+++ b/llvm/lib/CodeGen/AtomicExpandPass.cpp
@@ -770,7 +770,7 @@ struct PartwordMaskValues {
Value *Inv_Mask = nullptr;
};
-LLVM_ATTRIBUTE_UNUSED
+[[maybe_unused]]
raw_ostream &operator<<(raw_ostream &O, const PartwordMaskValues &PMV) {
auto PrintObj = [&O](auto *V) {
if (V)
diff --git a/llvm/lib/CodeGen/BranchRelaxation.cpp b/llvm/lib/CodeGen/BranchRelaxation.cpp
index 2d50167..fae952e 100644
--- a/llvm/lib/CodeGen/BranchRelaxation.cpp
+++ b/llvm/lib/CodeGen/BranchRelaxation.cpp
@@ -491,6 +491,20 @@ bool BranchRelaxation::fixupConditionalBranch(MachineInstr &MI) {
return true;
}
if (FBB) {
+ // If we get here with a MBB which ends like this:
+ //
+ // bb.1:
+ // successors: %bb.2;
+ // ...
+ // BNE $x1, $x0, %bb.2
+ // PseudoBR %bb.2
+ //
+ // Just remove conditional branch.
+ if (TBB == FBB) {
+ removeBranch(MBB);
+ insertUncondBranch(MBB, TBB);
+ return true;
+ }
// We need to split the basic block here to obtain two long-range
// unconditional branches.
NewBB = createNewBlockAfter(*MBB);
diff --git a/llvm/lib/CodeGen/CodeGenPrepare.cpp b/llvm/lib/CodeGen/CodeGenPrepare.cpp
index 4320b1d..9e78ec9 100644
--- a/llvm/lib/CodeGen/CodeGenPrepare.cpp
+++ b/llvm/lib/CodeGen/CodeGenPrepare.cpp
@@ -819,7 +819,7 @@ void CodeGenPrepare::removeAllAssertingVHReferences(Value *V) {
}
// Verify BFI has been updated correctly by recomputing BFI and comparing them.
-void LLVM_ATTRIBUTE_UNUSED CodeGenPrepare::verifyBFIUpdates(Function &F) {
+[[maybe_unused]] void CodeGenPrepare::verifyBFIUpdates(Function &F) {
DominatorTree NewDT(F);
LoopInfo NewLI(NewDT);
BranchProbabilityInfo NewBPI(F, NewLI, TLInfo);
diff --git a/llvm/lib/CodeGen/GlobalISel/GISelValueTracking.cpp b/llvm/lib/CodeGen/GlobalISel/GISelValueTracking.cpp
index 3812823..04d9309 100644
--- a/llvm/lib/CodeGen/GlobalISel/GISelValueTracking.cpp
+++ b/llvm/lib/CodeGen/GlobalISel/GISelValueTracking.cpp
@@ -112,7 +112,7 @@ APInt GISelValueTracking::getKnownOnes(Register R) {
return getKnownBits(R).One;
}
-LLVM_ATTRIBUTE_UNUSED static void
+[[maybe_unused]] static void
dumpResult(const MachineInstr &MI, const KnownBits &Known, unsigned Depth) {
dbgs() << "[" << Depth << "] Compute known bits: " << MI << "[" << Depth
<< "] Computed for: " << MI << "[" << Depth << "] Known: 0x"
@@ -2013,6 +2013,43 @@ unsigned GISelValueTracking::computeNumSignBits(Register R,
FirstAnswer = std::min(Src1NumSignBits, Src2NumSignBits) - 1;
break;
}
+ case TargetOpcode::G_ADD: {
+ Register Src2 = MI.getOperand(2).getReg();
+ unsigned Src2NumSignBits =
+ computeNumSignBits(Src2, DemandedElts, Depth + 1);
+ if (Src2NumSignBits <= 2)
+ return 1; // Early out.
+
+ Register Src1 = MI.getOperand(1).getReg();
+ unsigned Src1NumSignBits =
+ computeNumSignBits(Src1, DemandedElts, Depth + 1);
+ if (Src1NumSignBits == 1)
+ return 1; // Early Out.
+
+ // Special case decrementing a value (ADD X, -1):
+ KnownBits Known2 = getKnownBits(Src2, DemandedElts, Depth);
+ if (Known2.isAllOnes()) {
+ KnownBits Known1 = getKnownBits(Src1, DemandedElts, Depth);
+ // If the input is known to be 0 or 1, the output is 0/-1, which is all
+ // sign bits set.
+ if ((Known1.Zero | 1).isAllOnes())
+ return TyBits;
+
+ // If we are subtracting one from a positive number, there is no carry
+ // out of the result.
+ if (Known1.isNonNegative()) {
+ FirstAnswer = Src1NumSignBits;
+ break;
+ }
+
+ // Otherwise, we treat this like an ADD.
+ }
+
+ // Add can have at most one carry bit. Thus we know that the output
+ // is, at worst, one more bit than the inputs.
+ FirstAnswer = std::min(Src1NumSignBits, Src2NumSignBits) - 1;
+ break;
+ }
case TargetOpcode::G_FCMP:
case TargetOpcode::G_ICMP: {
bool IsFP = Opcode == TargetOpcode::G_FCMP;
diff --git a/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp b/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp
index cffaf7c..38ec83f 100644
--- a/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp
+++ b/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp
@@ -3292,8 +3292,7 @@ LegalizerHelper::widenScalar(MachineInstr &MI, unsigned TypeIdx, LLT WideTy) {
if (TypeIdx != 2)
return UnableToLegalize;
Observer.changingInstr(MI);
- // TODO: Probably should be zext
- widenScalarSrc(MI, WideTy, 2, TargetOpcode::G_SEXT);
+ widenScalarSrc(MI, WideTy, 2, TargetOpcode::G_ZEXT);
Observer.changedInstr(MI);
return Legalized;
}
@@ -3325,8 +3324,7 @@ LegalizerHelper::widenScalar(MachineInstr &MI, unsigned TypeIdx, LLT WideTy) {
if (TypeIdx == 2) {
Observer.changingInstr(MI);
- // TODO: Probably should be zext
- widenScalarSrc(MI, WideTy, 3, TargetOpcode::G_SEXT);
+ widenScalarSrc(MI, WideTy, 3, TargetOpcode::G_ZEXT);
Observer.changedInstr(MI);
return Legalized;
}
diff --git a/llvm/lib/CodeGen/LiveIntervals.cpp b/llvm/lib/CodeGen/LiveIntervals.cpp
index 0e38017..d2f2c3e 100644
--- a/llvm/lib/CodeGen/LiveIntervals.cpp
+++ b/llvm/lib/CodeGen/LiveIntervals.cpp
@@ -661,7 +661,10 @@ void LiveIntervals::extendToIndices(LiveRange &LR,
void LiveIntervals::pruneValue(LiveRange &LR, SlotIndex Kill,
SmallVectorImpl<SlotIndex> *EndPoints) {
LiveQueryResult LRQ = LR.Query(Kill);
- VNInfo *VNI = LRQ.valueOutOrDead();
+ // LR may have liveness reachable from early clobber slot, which may be
+ // only live-in instead of live-out of the instruction.
+ // For example, LR =[1r, 3r), Kill = 3e, we have to prune [3e, 3r) of LR.
+ VNInfo *VNI = LRQ.valueOutOrDead() ? LRQ.valueOutOrDead() : LRQ.valueIn();
if (!VNI)
return;
diff --git a/llvm/lib/CodeGen/MachineCopyPropagation.cpp b/llvm/lib/CodeGen/MachineCopyPropagation.cpp
index e359831..ea08365 100644
--- a/llvm/lib/CodeGen/MachineCopyPropagation.cpp
+++ b/llvm/lib/CodeGen/MachineCopyPropagation.cpp
@@ -1257,7 +1257,7 @@ void MachineCopyPropagation::BackwardCopyPropagateBlock(
Tracker.clear();
}
-static void LLVM_ATTRIBUTE_UNUSED printSpillReloadChain(
+[[maybe_unused]] static void printSpillReloadChain(
DenseMap<MachineInstr *, SmallVector<MachineInstr *>> &SpillChain,
DenseMap<MachineInstr *, SmallVector<MachineInstr *>> &ReloadChain,
MachineInstr *Leader) {
diff --git a/llvm/lib/CodeGen/ScheduleDAGInstrs.cpp b/llvm/lib/CodeGen/ScheduleDAGInstrs.cpp
index 3268c26..9662511 100644
--- a/llvm/lib/CodeGen/ScheduleDAGInstrs.cpp
+++ b/llvm/lib/CodeGen/ScheduleDAGInstrs.cpp
@@ -1551,7 +1551,7 @@ LLVM_DUMP_METHOD void ILPValue::dump() const {
dbgs() << *this << '\n';
}
-LLVM_ATTRIBUTE_UNUSED
+[[maybe_unused]]
raw_ostream &llvm::operator<<(raw_ostream &OS, const ILPValue &Val) {
Val.print(OS);
return OS;
diff --git a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
index e153842..c97300d 100644
--- a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
@@ -658,13 +658,13 @@ namespace {
bool InexpensiveOnly = false,
std::optional<EVT> OutVT = std::nullopt);
SDValue BuildDivEstimate(SDValue N, SDValue Op, SDNodeFlags Flags);
- SDValue buildRsqrtEstimate(SDValue Op, SDNodeFlags Flags);
- SDValue buildSqrtEstimate(SDValue Op, SDNodeFlags Flags);
- SDValue buildSqrtEstimateImpl(SDValue Op, SDNodeFlags Flags, bool Recip);
+ SDValue buildRsqrtEstimate(SDValue Op);
+ SDValue buildSqrtEstimate(SDValue Op);
+ SDValue buildSqrtEstimateImpl(SDValue Op, bool Recip);
SDValue buildSqrtNROneConst(SDValue Arg, SDValue Est, unsigned Iterations,
- SDNodeFlags Flags, bool Reciprocal);
+ bool Reciprocal);
SDValue buildSqrtNRTwoConst(SDValue Arg, SDValue Est, unsigned Iterations,
- SDNodeFlags Flags, bool Reciprocal);
+ bool Reciprocal);
SDValue MatchBSwapHWordLow(SDNode *N, SDValue N0, SDValue N1,
bool DemandHighBits = true);
SDValue MatchBSwapHWord(SDNode *N, SDValue N0, SDValue N1);
@@ -5044,7 +5044,6 @@ static SDValue simplifyDivRem(SDNode *N, SelectionDAG &DAG) {
unsigned Opc = N->getOpcode();
bool IsDiv = (ISD::SDIV == Opc) || (ISD::UDIV == Opc);
- ConstantSDNode *N1C = isConstOrConstSplat(N1);
// X / undef -> undef
// X % undef -> undef
@@ -5076,7 +5075,7 @@ static SDValue simplifyDivRem(SDNode *N, SelectionDAG &DAG) {
// division-by-zero or remainder-by-zero, so assume the divisor is 1.
// TODO: Similarly, if we're zero-extending a boolean divisor, then assume
// it's a 1.
- if ((N1C && N1C->isOne()) || (VT.getScalarType() == MVT::i1))
+ if (isOneOrOneSplat(N1) || (VT.getScalarType() == MVT::i1))
return IsDiv ? N0 : DAG.getConstant(0, DL, VT);
return SDValue();
@@ -17760,7 +17759,6 @@ SDValue DAGCombiner::visitFADD(SDNode *N) {
bool N1CFP = DAG.isConstantFPBuildVectorOrConstantFP(N1);
EVT VT = N->getValueType(0);
SDLoc DL(N);
- const TargetOptions &Options = DAG.getTarget().Options;
SDNodeFlags Flags = N->getFlags();
SelectionDAG::FlagInserter FlagsInserter(DAG, N);
@@ -17826,7 +17824,7 @@ SDValue DAGCombiner::visitFADD(SDNode *N) {
bool AllowNewConst = (Level < AfterLegalizeDAG);
// If nnan is enabled, fold lots of things.
- if ((Options.NoNaNsFPMath || Flags.hasNoNaNs()) && AllowNewConst) {
+ if (Flags.hasNoNaNs() && AllowNewConst) {
// If allowed, fold (fadd (fneg x), x) -> 0.0
if (N0.getOpcode() == ISD::FNEG && N0.getOperand(0) == N1)
return DAG.getConstantFP(0.0, DL, VT);
@@ -17975,7 +17973,6 @@ SDValue DAGCombiner::visitFSUB(SDNode *N) {
ConstantFPSDNode *N1CFP = isConstOrConstSplatFP(N1, true);
EVT VT = N->getValueType(0);
SDLoc DL(N);
- const TargetOptions &Options = DAG.getTarget().Options;
const SDNodeFlags Flags = N->getFlags();
SelectionDAG::FlagInserter FlagsInserter(DAG, N);
@@ -18003,7 +18000,7 @@ SDValue DAGCombiner::visitFSUB(SDNode *N) {
if (N0 == N1) {
// (fsub x, x) -> 0.0
- if (Options.NoNaNsFPMath || Flags.hasNoNaNs())
+ if (Flags.hasNoNaNs())
return DAG.getConstantFP(0.0f, DL, VT);
}
@@ -18314,7 +18311,6 @@ template <class MatchContextClass> SDValue DAGCombiner::visitFMA(SDNode *N) {
ConstantFPSDNode *N2CFP = dyn_cast<ConstantFPSDNode>(N2);
EVT VT = N->getValueType(0);
SDLoc DL(N);
- const TargetOptions &Options = DAG.getTarget().Options;
// FMA nodes have flags that propagate to the created nodes.
SelectionDAG::FlagInserter FlagsInserter(DAG, N);
MatchContextClass matcher(DAG, TLI, N);
@@ -18340,8 +18336,7 @@ template <class MatchContextClass> SDValue DAGCombiner::visitFMA(SDNode *N) {
return matcher.getNode(ISD::FMA, DL, VT, NegN0, NegN1, N2);
}
- if ((Options.NoNaNsFPMath && N->getFlags().hasNoInfs()) ||
- (N->getFlags().hasNoNaNs() && N->getFlags().hasNoInfs())) {
+ if (N->getFlags().hasNoNaNs() && N->getFlags().hasNoInfs()) {
if (N->getFlags().hasNoSignedZeros() ||
(N2CFP && !N2CFP->isExactlyValue(-0.0))) {
if (N0CFP && N0CFP->isZero())
@@ -18591,20 +18586,18 @@ SDValue DAGCombiner::visitFDIV(SDNode *N) {
// If this FDIV is part of a reciprocal square root, it may be folded
// into a target-specific square root estimate instruction.
if (N1.getOpcode() == ISD::FSQRT) {
- if (SDValue RV = buildRsqrtEstimate(N1.getOperand(0), Flags))
+ if (SDValue RV = buildRsqrtEstimate(N1.getOperand(0)))
return DAG.getNode(ISD::FMUL, DL, VT, N0, RV);
} else if (N1.getOpcode() == ISD::FP_EXTEND &&
N1.getOperand(0).getOpcode() == ISD::FSQRT) {
- if (SDValue RV =
- buildRsqrtEstimate(N1.getOperand(0).getOperand(0), Flags)) {
+ if (SDValue RV = buildRsqrtEstimate(N1.getOperand(0).getOperand(0))) {
RV = DAG.getNode(ISD::FP_EXTEND, SDLoc(N1), VT, RV);
AddToWorklist(RV.getNode());
return DAG.getNode(ISD::FMUL, DL, VT, N0, RV);
}
} else if (N1.getOpcode() == ISD::FP_ROUND &&
N1.getOperand(0).getOpcode() == ISD::FSQRT) {
- if (SDValue RV =
- buildRsqrtEstimate(N1.getOperand(0).getOperand(0), Flags)) {
+ if (SDValue RV = buildRsqrtEstimate(N1.getOperand(0).getOperand(0))) {
RV = DAG.getNode(ISD::FP_ROUND, SDLoc(N1), VT, RV, N1.getOperand(1));
AddToWorklist(RV.getNode());
return DAG.getNode(ISD::FMUL, DL, VT, N0, RV);
@@ -18636,7 +18629,7 @@ SDValue DAGCombiner::visitFDIV(SDNode *N) {
SDValue AA = DAG.getNode(ISD::FMUL, DL, VT, A, A);
SDValue AAZ =
DAG.getNode(ISD::FMUL, DL, VT, AA, Sqrt.getOperand(0));
- if (SDValue Rsqrt = buildRsqrtEstimate(AAZ, Flags))
+ if (SDValue Rsqrt = buildRsqrtEstimate(AAZ))
return DAG.getNode(ISD::FMUL, DL, VT, N0, Rsqrt);
// Estimate creation failed. Clean up speculatively created nodes.
@@ -18646,7 +18639,7 @@ SDValue DAGCombiner::visitFDIV(SDNode *N) {
// We found a FSQRT, so try to make this fold:
// X / (Y * sqrt(Z)) -> X * (rsqrt(Z) / Y)
- if (SDValue Rsqrt = buildRsqrtEstimate(Sqrt.getOperand(0), Flags)) {
+ if (SDValue Rsqrt = buildRsqrtEstimate(Sqrt.getOperand(0))) {
SDValue Div = DAG.getNode(ISD::FDIV, SDLoc(N1), VT, Rsqrt, Y);
AddToWorklist(Div.getNode());
return DAG.getNode(ISD::FMUL, DL, VT, N0, Div);
@@ -18743,11 +18736,12 @@ SDValue DAGCombiner::visitFSQRT(SDNode *N) {
return SDValue();
// FSQRT nodes have flags that propagate to the created nodes.
+ SelectionDAG::FlagInserter FlagInserter(DAG, Flags);
// TODO: If this is N0/sqrt(N0), and we reach this node before trying to
// transform the fdiv, we may produce a sub-optimal estimate sequence
// because the reciprocal calculation may not have to filter out a
// 0.0 input.
- return buildSqrtEstimate(N0, Flags);
+ return buildSqrtEstimate(N0);
}
/// copysign(x, fp_extend(y)) -> copysign(x, y)
@@ -29744,28 +29738,27 @@ SDValue DAGCombiner::BuildDivEstimate(SDValue N, SDValue Op,
/// X_{i+1} = X_i (1.5 - A X_i^2 / 2)
/// As a result, we precompute A/2 prior to the iteration loop.
SDValue DAGCombiner::buildSqrtNROneConst(SDValue Arg, SDValue Est,
- unsigned Iterations,
- SDNodeFlags Flags, bool Reciprocal) {
+ unsigned Iterations, bool Reciprocal) {
EVT VT = Arg.getValueType();
SDLoc DL(Arg);
SDValue ThreeHalves = DAG.getConstantFP(1.5, DL, VT);
// We now need 0.5 * Arg which we can write as (1.5 * Arg - Arg) so that
// this entire sequence requires only one FP constant.
- SDValue HalfArg = DAG.getNode(ISD::FMUL, DL, VT, ThreeHalves, Arg, Flags);
- HalfArg = DAG.getNode(ISD::FSUB, DL, VT, HalfArg, Arg, Flags);
+ SDValue HalfArg = DAG.getNode(ISD::FMUL, DL, VT, ThreeHalves, Arg);
+ HalfArg = DAG.getNode(ISD::FSUB, DL, VT, HalfArg, Arg);
// Newton iterations: Est = Est * (1.5 - HalfArg * Est * Est)
for (unsigned i = 0; i < Iterations; ++i) {
- SDValue NewEst = DAG.getNode(ISD::FMUL, DL, VT, Est, Est, Flags);
- NewEst = DAG.getNode(ISD::FMUL, DL, VT, HalfArg, NewEst, Flags);
- NewEst = DAG.getNode(ISD::FSUB, DL, VT, ThreeHalves, NewEst, Flags);
- Est = DAG.getNode(ISD::FMUL, DL, VT, Est, NewEst, Flags);
+ SDValue NewEst = DAG.getNode(ISD::FMUL, DL, VT, Est, Est);
+ NewEst = DAG.getNode(ISD::FMUL, DL, VT, HalfArg, NewEst);
+ NewEst = DAG.getNode(ISD::FSUB, DL, VT, ThreeHalves, NewEst);
+ Est = DAG.getNode(ISD::FMUL, DL, VT, Est, NewEst);
}
// If non-reciprocal square root is requested, multiply the result by Arg.
if (!Reciprocal)
- Est = DAG.getNode(ISD::FMUL, DL, VT, Est, Arg, Flags);
+ Est = DAG.getNode(ISD::FMUL, DL, VT, Est, Arg);
return Est;
}
@@ -29776,8 +29769,7 @@ SDValue DAGCombiner::buildSqrtNROneConst(SDValue Arg, SDValue Est,
/// =>
/// X_{i+1} = (-0.5 * X_i) * (A * X_i * X_i + (-3.0))
SDValue DAGCombiner::buildSqrtNRTwoConst(SDValue Arg, SDValue Est,
- unsigned Iterations,
- SDNodeFlags Flags, bool Reciprocal) {
+ unsigned Iterations, bool Reciprocal) {
EVT VT = Arg.getValueType();
SDLoc DL(Arg);
SDValue MinusThree = DAG.getConstantFP(-3.0, DL, VT);
@@ -29790,9 +29782,9 @@ SDValue DAGCombiner::buildSqrtNRTwoConst(SDValue Arg, SDValue Est,
// Newton iterations for reciprocal square root:
// E = (E * -0.5) * ((A * E) * E + -3.0)
for (unsigned i = 0; i < Iterations; ++i) {
- SDValue AE = DAG.getNode(ISD::FMUL, DL, VT, Arg, Est, Flags);
- SDValue AEE = DAG.getNode(ISD::FMUL, DL, VT, AE, Est, Flags);
- SDValue RHS = DAG.getNode(ISD::FADD, DL, VT, AEE, MinusThree, Flags);
+ SDValue AE = DAG.getNode(ISD::FMUL, DL, VT, Arg, Est);
+ SDValue AEE = DAG.getNode(ISD::FMUL, DL, VT, AE, Est);
+ SDValue RHS = DAG.getNode(ISD::FADD, DL, VT, AEE, MinusThree);
// When calculating a square root at the last iteration build:
// S = ((A * E) * -0.5) * ((A * E) * E + -3.0)
@@ -29800,13 +29792,13 @@ SDValue DAGCombiner::buildSqrtNRTwoConst(SDValue Arg, SDValue Est,
SDValue LHS;
if (Reciprocal || (i + 1) < Iterations) {
// RSQRT: LHS = (E * -0.5)
- LHS = DAG.getNode(ISD::FMUL, DL, VT, Est, MinusHalf, Flags);
+ LHS = DAG.getNode(ISD::FMUL, DL, VT, Est, MinusHalf);
} else {
// SQRT: LHS = (A * E) * -0.5
- LHS = DAG.getNode(ISD::FMUL, DL, VT, AE, MinusHalf, Flags);
+ LHS = DAG.getNode(ISD::FMUL, DL, VT, AE, MinusHalf);
}
- Est = DAG.getNode(ISD::FMUL, DL, VT, LHS, RHS, Flags);
+ Est = DAG.getNode(ISD::FMUL, DL, VT, LHS, RHS);
}
return Est;
@@ -29815,8 +29807,7 @@ SDValue DAGCombiner::buildSqrtNRTwoConst(SDValue Arg, SDValue Est,
/// Build code to calculate either rsqrt(Op) or sqrt(Op). In the latter case
/// Op*rsqrt(Op) is actually computed, so additional postprocessing is needed if
/// Op can be zero.
-SDValue DAGCombiner::buildSqrtEstimateImpl(SDValue Op, SDNodeFlags Flags,
- bool Reciprocal) {
+SDValue DAGCombiner::buildSqrtEstimateImpl(SDValue Op, bool Reciprocal) {
if (LegalDAG)
return SDValue();
@@ -29844,8 +29835,8 @@ SDValue DAGCombiner::buildSqrtEstimateImpl(SDValue Op, SDNodeFlags Flags,
if (Iterations > 0)
Est = UseOneConstNR
- ? buildSqrtNROneConst(Op, Est, Iterations, Flags, Reciprocal)
- : buildSqrtNRTwoConst(Op, Est, Iterations, Flags, Reciprocal);
+ ? buildSqrtNROneConst(Op, Est, Iterations, Reciprocal)
+ : buildSqrtNRTwoConst(Op, Est, Iterations, Reciprocal);
if (!Reciprocal) {
SDLoc DL(Op);
// Try the target specific test first.
@@ -29863,12 +29854,12 @@ SDValue DAGCombiner::buildSqrtEstimateImpl(SDValue Op, SDNodeFlags Flags,
return SDValue();
}
-SDValue DAGCombiner::buildRsqrtEstimate(SDValue Op, SDNodeFlags Flags) {
- return buildSqrtEstimateImpl(Op, Flags, true);
+SDValue DAGCombiner::buildRsqrtEstimate(SDValue Op) {
+ return buildSqrtEstimateImpl(Op, true);
}
-SDValue DAGCombiner::buildSqrtEstimate(SDValue Op, SDNodeFlags Flags) {
- return buildSqrtEstimateImpl(Op, Flags, false);
+SDValue DAGCombiner::buildSqrtEstimate(SDValue Op) {
+ return buildSqrtEstimateImpl(Op, false);
}
/// Return true if there is any possibility that the two addresses overlap.
diff --git a/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp b/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
index c9aeef7..90edaf3 100644
--- a/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
@@ -5063,8 +5063,7 @@ unsigned SelectionDAG::ComputeNumSignBits(SDValue Op, const APInt &DemandedElts,
break;
case ISD::ADD:
case ISD::ADDC:
- // Add can have at most one carry bit. Thus we know that the output
- // is, at worst, one more bit than the inputs.
+ // TODO: Move Operand 1 check before Operand 0 check
Tmp = ComputeNumSignBits(Op.getOperand(0), DemandedElts, Depth + 1);
if (Tmp == 1) return 1; // Early out.
@@ -5088,6 +5087,9 @@ unsigned SelectionDAG::ComputeNumSignBits(SDValue Op, const APInt &DemandedElts,
Tmp2 = ComputeNumSignBits(Op.getOperand(1), DemandedElts, Depth + 1);
if (Tmp2 == 1) return 1; // Early out.
+
+ // Add can have at most one carry bit. Thus we know that the output
+ // is, at worst, one more bit than the inputs.
return std::min(Tmp, Tmp2) - 1;
case ISD::SUB:
Tmp2 = ComputeNumSignBits(Op.getOperand(1), DemandedElts, Depth + 1);
@@ -6403,8 +6405,9 @@ static SDValue foldCONCAT_VECTORS(const SDLoc &DL, EVT VT,
if (VT.isScalableVector())
return SDValue();
- // A CONCAT_VECTOR with all UNDEF/BUILD_VECTOR operands can be
- // simplified to one big BUILD_VECTOR.
+ // A CONCAT_VECTOR of scalar sources, such as UNDEF, BUILD_VECTOR and
+ // single-element INSERT_VECTOR_ELT operands can be simplified to one big
+ // BUILD_VECTOR.
// FIXME: Add support for SCALAR_TO_VECTOR as well.
EVT SVT = VT.getScalarType();
SmallVector<SDValue, 16> Elts;
@@ -6414,6 +6417,10 @@ static SDValue foldCONCAT_VECTORS(const SDLoc &DL, EVT VT,
Elts.append(OpVT.getVectorNumElements(), DAG.getUNDEF(SVT));
else if (Op.getOpcode() == ISD::BUILD_VECTOR)
Elts.append(Op->op_begin(), Op->op_end());
+ else if (Op.getOpcode() == ISD::INSERT_VECTOR_ELT &&
+ OpVT.getVectorNumElements() == 1 &&
+ isNullConstant(Op.getOperand(2)))
+ Elts.push_back(Op.getOperand(1));
else
return SDValue();
}