aboutsummaryrefslogtreecommitdiff
path: root/llvm/lib/CodeGen
diff options
context:
space:
mode:
Diffstat (limited to 'llvm/lib/CodeGen')
-rw-r--r--llvm/lib/CodeGen/AsmPrinter/DwarfCompileUnit.cpp6
-rw-r--r--llvm/lib/CodeGen/AsmPrinter/DwarfUnit.cpp15
-rw-r--r--llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp9
-rw-r--r--llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp87
-rw-r--r--llvm/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp13
-rw-r--r--llvm/lib/CodeGen/SwitchLoweringUtils.cpp22
-rw-r--r--llvm/lib/CodeGen/TargetLoweringBase.cpp21
7 files changed, 148 insertions, 25 deletions
diff --git a/llvm/lib/CodeGen/AsmPrinter/DwarfCompileUnit.cpp b/llvm/lib/CodeGen/AsmPrinter/DwarfCompileUnit.cpp
index 518121e..751d373 100644
--- a/llvm/lib/CodeGen/AsmPrinter/DwarfCompileUnit.cpp
+++ b/llvm/lib/CodeGen/AsmPrinter/DwarfCompileUnit.cpp
@@ -1793,9 +1793,13 @@ void DwarfCompileUnit::createBaseTypeDIEs() {
"_" + Twine(Btr.BitSize)).toStringRef(Str));
addUInt(Die, dwarf::DW_AT_encoding, dwarf::DW_FORM_data1, Btr.Encoding);
// Round up to smallest number of bytes that contains this number of bits.
+ // ExprRefedBaseTypes is populated with types referenced by
+ // DW_OP_LLVM_convert operations in location expressions. These are often
+ // byte-sized, but one common counter-example is 1-bit sized conversions
+ // from `i1` types. TODO: Should these use DW_AT_bit_size? See
+ // DwarfUnit::constructTypeDIE.
addUInt(Die, dwarf::DW_AT_byte_size, std::nullopt,
divideCeil(Btr.BitSize, 8));
-
Btr.Die = &Die;
}
}
diff --git a/llvm/lib/CodeGen/AsmPrinter/DwarfUnit.cpp b/llvm/lib/CodeGen/AsmPrinter/DwarfUnit.cpp
index e40fb76..555c56f 100644
--- a/llvm/lib/CodeGen/AsmPrinter/DwarfUnit.cpp
+++ b/llvm/lib/CodeGen/AsmPrinter/DwarfUnit.cpp
@@ -766,8 +766,19 @@ void DwarfUnit::constructTypeDIE(DIE &Buffer, const DIBasicType *BTy) {
addUInt(Buffer, dwarf::DW_AT_encoding, dwarf::DW_FORM_data1,
BTy->getEncoding());
- uint64_t Size = BTy->getSizeInBits() >> 3;
- addUInt(Buffer, dwarf::DW_AT_byte_size, std::nullopt, Size);
+ uint64_t SizeInBytes = divideCeil(BTy->getSizeInBits(), 8);
+ addUInt(Buffer, dwarf::DW_AT_byte_size, std::nullopt, SizeInBytes);
+ if (BTy->getTag() == dwarf::Tag::DW_TAG_base_type) {
+ // DW_TAG_base_type:
+ // If the value of an object of the given type does not fully occupy the
+ // storage described by a byte size attribute, the base type entry may also
+ // have a DW_AT_bit_size [...] attribute.
+ // TODO: Do big endian targets need DW_AT_data_bit_offset? See discussion in
+ // pull request #164372.
+ if (uint64_t DataSizeInBits = BTy->getDataSizeInBits();
+ DataSizeInBits && DataSizeInBits != SizeInBytes * 8)
+ addUInt(Buffer, dwarf::DW_AT_bit_size, std::nullopt, DataSizeInBits);
+ }
if (BTy->isBigEndian())
addUInt(Buffer, dwarf::DW_AT_endianity, std::nullopt, dwarf::DW_END_big);
diff --git a/llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp b/llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp
index 1f10478..9ace7d6 100644
--- a/llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp
+++ b/llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp
@@ -4425,6 +4425,7 @@ void CombinerHelper::applyBuildFnNoErase(
}
bool CombinerHelper::matchOrShiftToFunnelShift(MachineInstr &MI,
+ bool AllowScalarConstants,
BuildFnTy &MatchInfo) const {
assert(MI.getOpcode() == TargetOpcode::G_OR);
@@ -4444,31 +4445,29 @@ bool CombinerHelper::matchOrShiftToFunnelShift(MachineInstr &MI,
// Given constants C0 and C1 such that C0 + C1 is bit-width:
// (or (shl x, C0), (lshr y, C1)) -> (fshl x, y, C0) or (fshr x, y, C1)
- int64_t CstShlAmt, CstLShrAmt;
+ int64_t CstShlAmt = 0, CstLShrAmt;
if (mi_match(ShlAmt, MRI, m_ICstOrSplat(CstShlAmt)) &&
mi_match(LShrAmt, MRI, m_ICstOrSplat(CstLShrAmt)) &&
CstShlAmt + CstLShrAmt == BitWidth) {
FshOpc = TargetOpcode::G_FSHR;
Amt = LShrAmt;
-
} else if (mi_match(LShrAmt, MRI,
m_GSub(m_SpecificICstOrSplat(BitWidth), m_Reg(Amt))) &&
ShlAmt == Amt) {
// (or (shl x, amt), (lshr y, (sub bw, amt))) -> (fshl x, y, amt)
FshOpc = TargetOpcode::G_FSHL;
-
} else if (mi_match(ShlAmt, MRI,
m_GSub(m_SpecificICstOrSplat(BitWidth), m_Reg(Amt))) &&
LShrAmt == Amt) {
// (or (shl x, (sub bw, amt)), (lshr y, amt)) -> (fshr x, y, amt)
FshOpc = TargetOpcode::G_FSHR;
-
} else {
return false;
}
LLT AmtTy = MRI.getType(Amt);
- if (!isLegalOrBeforeLegalizer({FshOpc, {Ty, AmtTy}}))
+ if (!isLegalOrBeforeLegalizer({FshOpc, {Ty, AmtTy}}) &&
+ (!AllowScalarConstants || CstShlAmt == 0 || !Ty.isScalar()))
return false;
MatchInfo = [=](MachineIRBuilder &B) {
diff --git a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
index cf221bb..1ef5dc2 100644
--- a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
@@ -23506,6 +23506,93 @@ SDValue DAGCombiner::visitINSERT_VECTOR_ELT(SDNode *N) {
// inselt undef, InVal, EltNo --> build_vector < InVal, InVal, ... >
if (InVec.isUndef() && TLI.shouldSplatInsEltVarIndex(VT))
return DAG.getSplat(VT, DL, InVal);
+
+ // Extend this type to be byte-addressable
+ EVT OldVT = VT;
+ EVT EltVT = VT.getVectorElementType();
+ bool IsByteSized = EltVT.isByteSized();
+ if (!IsByteSized) {
+ EltVT =
+ EltVT.changeTypeToInteger().getRoundIntegerType(*DAG.getContext());
+ VT = VT.changeElementType(EltVT);
+ }
+
+ // Check if this operation will be handled the default way for its type.
+ auto IsTypeDefaultHandled = [this](EVT VT) {
+ return TLI.getTypeAction(*DAG.getContext(), VT) ==
+ TargetLowering::TypeSplitVector ||
+ TLI.isOperationExpand(ISD::INSERT_VECTOR_ELT, VT);
+ };
+
+ // Check if this operation is illegal and will be handled the default way,
+ // even after extending the type to be byte-addressable.
+ if (IsTypeDefaultHandled(OldVT) && IsTypeDefaultHandled(VT)) {
+ // For each dynamic insertelt, the default way will save the vector to
+ // the stack, store at an offset, and load the modified vector. This can
+ // dramatically increase code size if we have a chain of insertelts on a
+ // large vector: requiring O(V*C) stores/loads where V = length of
+ // vector and C is length of chain. If each insertelt is only fed into the
+ // next, the vector is write-only across this chain, and we can just
+ // save once before the chain and load after in O(V + C) operations.
+ SmallVector<SDNode *> Seq{N};
+ unsigned NumDynamic = 1;
+ while (true) {
+ SDValue InVec = Seq.back()->getOperand(0);
+ if (InVec.getOpcode() != ISD::INSERT_VECTOR_ELT)
+ break;
+ Seq.push_back(InVec.getNode());
+ NumDynamic += !isa<ConstantSDNode>(InVec.getOperand(2));
+ }
+
+ // It always and only makes sense to lower this sequence when we have more
+ // than one dynamic insertelt, since we will not have more than V constant
+ // insertelts, so we will be reducing the total number of stores+loads.
+ if (NumDynamic > 1) {
+ // In cases where the vector is illegal it will be broken down into
+ // parts and stored in parts - we should use the alignment for the
+ // smallest part.
+ Align SmallestAlign = DAG.getReducedAlign(VT, /*UseABI=*/false);
+ SDValue StackPtr =
+ DAG.CreateStackTemporary(VT.getStoreSize(), SmallestAlign);
+ auto &MF = DAG.getMachineFunction();
+ int FrameIndex = cast<FrameIndexSDNode>(StackPtr.getNode())->getIndex();
+ auto PtrInfo = MachinePointerInfo::getFixedStack(MF, FrameIndex);
+
+ // Save the vector to the stack
+ SDValue InVec = Seq.back()->getOperand(0);
+ if (!IsByteSized)
+ InVec = DAG.getNode(ISD::ANY_EXTEND, DL, VT, InVec);
+ SDValue Store = DAG.getStore(DAG.getEntryNode(), DL, InVec, StackPtr,
+ PtrInfo, SmallestAlign);
+
+ // Lower each dynamic insertelt to a store
+ for (SDNode *N : reverse(Seq)) {
+ SDValue Elmnt = N->getOperand(1);
+ SDValue Index = N->getOperand(2);
+
+ // Check if we have to extend the element type
+ if (!IsByteSized && Elmnt.getValueType().bitsLT(EltVT))
+ Elmnt = DAG.getNode(ISD::ANY_EXTEND, DL, EltVT, Elmnt);
+
+ // Store the new element. This may be larger than the vector element
+ // type, so use a truncating store.
+ SDValue EltPtr =
+ TLI.getVectorElementPointer(DAG, StackPtr, VT, Index);
+ EVT EltVT = Elmnt.getValueType();
+ Store = DAG.getTruncStore(
+ Store, DL, Elmnt, EltPtr, MachinePointerInfo::getUnknownStack(MF),
+ EltVT,
+ commonAlignment(SmallestAlign, EltVT.getFixedSizeInBits() / 8));
+ }
+
+ // Load the saved vector from the stack
+ SDValue Load =
+ DAG.getLoad(VT, DL, Store, StackPtr, PtrInfo, SmallestAlign);
+ SDValue LoadV = Load.getValue(0);
+ return IsByteSized ? LoadV : DAG.getAnyExtOrTrunc(LoadV, DL, OldVT);
+ }
+ }
+
return SDValue();
}
diff --git a/llvm/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp b/llvm/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp
index 5fb7e63..431a810 100644
--- a/llvm/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp
@@ -2400,10 +2400,11 @@ SelectionDAGLegalize::ExpandDivRemLibCall(SDNode *Node,
Results.push_back(Rem);
}
-/// Return true if sincos libcall is available.
+/// Return true if sincos or __sincos_stret libcall is available.
static bool isSinCosLibcallAvailable(SDNode *Node, const TargetLowering &TLI) {
- RTLIB::Libcall LC = RTLIB::getSINCOS(Node->getSimpleValueType(0).SimpleTy);
- return TLI.getLibcallName(LC) != nullptr;
+ MVT::SimpleValueType VT = Node->getSimpleValueType(0).SimpleTy;
+ return TLI.getLibcallImpl(RTLIB::getSINCOS(VT)) != RTLIB::Unsupported ||
+ TLI.getLibcallImpl(RTLIB::getSINCOS_STRET(VT)) != RTLIB::Unsupported;
}
/// Only issue sincos libcall if both sin and cos are needed.
@@ -3752,9 +3753,9 @@ bool SelectionDAGLegalize::ExpandNode(SDNode *Node) {
EVT VT = Node->getValueType(0);
// Turn fsin / fcos into ISD::FSINCOS node if there are a pair of fsin /
// fcos which share the same operand and both are used.
- if ((TLI.isOperationLegalOrCustom(ISD::FSINCOS, VT) ||
- isSinCosLibcallAvailable(Node, TLI))
- && useSinCos(Node)) {
+ if ((TLI.isOperationLegal(ISD::FSINCOS, VT) ||
+ isSinCosLibcallAvailable(Node, TLI)) &&
+ useSinCos(Node)) {
SDVTList VTs = DAG.getVTList(VT, VT);
Tmp1 = DAG.getNode(ISD::FSINCOS, dl, VTs, Node->getOperand(0));
if (Node->getOpcode() == ISD::FCOS)
diff --git a/llvm/lib/CodeGen/SwitchLoweringUtils.cpp b/llvm/lib/CodeGen/SwitchLoweringUtils.cpp
index 038c499..3fa8243 100644
--- a/llvm/lib/CodeGen/SwitchLoweringUtils.cpp
+++ b/llvm/lib/CodeGen/SwitchLoweringUtils.cpp
@@ -198,7 +198,6 @@ bool SwitchCG::SwitchLowering::buildJumpTable(const CaseClusterVector &Clusters,
assert(First <= Last);
auto Prob = BranchProbability::getZero();
- unsigned NumCmps = 0;
std::vector<MachineBasicBlock*> Table;
DenseMap<MachineBasicBlock*, BranchProbability> JTProbs;
@@ -206,12 +205,16 @@ bool SwitchCG::SwitchLowering::buildJumpTable(const CaseClusterVector &Clusters,
for (unsigned I = First; I <= Last; ++I)
JTProbs[Clusters[I].MBB] = BranchProbability::getZero();
+ DenseMap<const BasicBlock *, unsigned int> DestMap;
for (unsigned I = First; I <= Last; ++I) {
assert(Clusters[I].Kind == CC_Range);
Prob += Clusters[I].Prob;
const APInt &Low = Clusters[I].Low->getValue();
const APInt &High = Clusters[I].High->getValue();
- NumCmps += (Low == High) ? 1 : 2;
+ unsigned int NumCmp = (Low == High) ? 1 : 2;
+ const BasicBlock *BB = Clusters[I].MBB->getBasicBlock();
+ DestMap[BB] += NumCmp;
+
if (I != First) {
// Fill the gap between this and the previous cluster.
const APInt &PreviousHigh = Clusters[I - 1].High->getValue();
@@ -226,9 +229,7 @@ bool SwitchCG::SwitchLowering::buildJumpTable(const CaseClusterVector &Clusters,
JTProbs[Clusters[I].MBB] += Clusters[I].Prob;
}
- unsigned NumDests = JTProbs.size();
- if (TLI->isSuitableForBitTests(NumDests, NumCmps,
- Clusters[First].Low->getValue(),
+ if (TLI->isSuitableForBitTests(DestMap, Clusters[First].Low->getValue(),
Clusters[Last].High->getValue(), *DL)) {
// Clusters[First..Last] should be lowered as bit tests instead.
return false;
@@ -372,20 +373,19 @@ bool SwitchCG::SwitchLowering::buildBitTests(CaseClusterVector &Clusters,
if (First == Last)
return false;
- BitVector Dests(FuncInfo.MF->getNumBlockIDs());
- unsigned NumCmps = 0;
+ DenseMap<const BasicBlock *, unsigned int> DestMap;
for (int64_t I = First; I <= Last; ++I) {
assert(Clusters[I].Kind == CC_Range);
- Dests.set(Clusters[I].MBB->getNumber());
- NumCmps += (Clusters[I].Low == Clusters[I].High) ? 1 : 2;
+ unsigned NumCmp = (Clusters[I].Low == Clusters[I].High) ? 1 : 2;
+ const BasicBlock *BB = Clusters[I].MBB->getBasicBlock();
+ DestMap[BB] += NumCmp;
}
- unsigned NumDests = Dests.count();
APInt Low = Clusters[First].Low->getValue();
APInt High = Clusters[Last].High->getValue();
assert(Low.slt(High));
- if (!TLI->isSuitableForBitTests(NumDests, NumCmps, Low, High, *DL))
+ if (!TLI->isSuitableForBitTests(DestMap, Low, High, *DL))
return false;
APInt LowBound;
diff --git a/llvm/lib/CodeGen/TargetLoweringBase.cpp b/llvm/lib/CodeGen/TargetLoweringBase.cpp
index 59798b3..b3535eac 100644
--- a/llvm/lib/CodeGen/TargetLoweringBase.cpp
+++ b/llvm/lib/CodeGen/TargetLoweringBase.cpp
@@ -11,6 +11,7 @@
//===----------------------------------------------------------------------===//
#include "llvm/ADT/BitVector.h"
+#include "llvm/ADT/DenseMap.h"
#include "llvm/ADT/STLExtras.h"
#include "llvm/ADT/SmallVector.h"
#include "llvm/ADT/StringExtras.h"
@@ -90,6 +91,11 @@ static cl::opt<unsigned> OptsizeJumpTableDensity(
cl::desc("Minimum density for building a jump table in "
"an optsize function"));
+static cl::opt<unsigned> MinimumBitTestCmpsOverride(
+ "min-bit-test-cmps", cl::init(2), cl::Hidden,
+ cl::desc("Set minimum of largest number of comparisons "
+ "to use bit test for switch."));
+
// FIXME: This option is only to test if the strict fp operation processed
// correctly by preventing mutating strict fp operation to normal fp operation
// during development. When the backend supports strict float operation, this
@@ -428,6 +434,11 @@ RTLIB::Libcall RTLIB::getSINCOSPI(EVT RetVT) {
SINCOSPI_F128, SINCOSPI_PPCF128);
}
+RTLIB::Libcall RTLIB::getSINCOS_STRET(EVT RetVT) {
+ return getFPLibCall(RetVT, SINCOS_STRET_F32, SINCOS_STRET_F64,
+ UNKNOWN_LIBCALL, UNKNOWN_LIBCALL, UNKNOWN_LIBCALL);
+}
+
RTLIB::Libcall RTLIB::getMODF(EVT RetVT) {
return getFPLibCall(RetVT, MODF_F32, MODF_F64, MODF_F80, MODF_F128,
MODF_PPCF128);
@@ -719,6 +730,8 @@ TargetLoweringBase::TargetLoweringBase(const TargetMachine &tm)
MinCmpXchgSizeInBits = 0;
SupportsUnalignedAtomics = false;
+
+ MinimumBitTestCmps = MinimumBitTestCmpsOverride;
}
// Define the virtual destructor out-of-line to act as a key method to anchor
@@ -2129,6 +2142,14 @@ bool TargetLoweringBase::isJumpTableRelative() const {
return getTargetMachine().isPositionIndependent();
}
+unsigned TargetLoweringBase::getMinimumBitTestCmps() const {
+ return MinimumBitTestCmps;
+}
+
+void TargetLoweringBase::setMinimumBitTestCmps(unsigned Val) {
+ MinimumBitTestCmps = Val;
+}
+
Align TargetLoweringBase::getPrefLoopAlignment(MachineLoop *ML) const {
if (TM.Options.LoopAlignment)
return Align(TM.Options.LoopAlignment);