diff options
Diffstat (limited to 'llvm/lib/CodeGen')
| -rw-r--r-- | llvm/lib/CodeGen/AsmPrinter/DwarfCompileUnit.cpp | 6 | ||||
| -rw-r--r-- | llvm/lib/CodeGen/AsmPrinter/DwarfUnit.cpp | 15 | ||||
| -rw-r--r-- | llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp | 9 | ||||
| -rw-r--r-- | llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp | 87 | ||||
| -rw-r--r-- | llvm/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp | 13 | ||||
| -rw-r--r-- | llvm/lib/CodeGen/SwitchLoweringUtils.cpp | 22 | ||||
| -rw-r--r-- | llvm/lib/CodeGen/TargetLoweringBase.cpp | 21 |
7 files changed, 148 insertions, 25 deletions
diff --git a/llvm/lib/CodeGen/AsmPrinter/DwarfCompileUnit.cpp b/llvm/lib/CodeGen/AsmPrinter/DwarfCompileUnit.cpp index 518121e..751d373 100644 --- a/llvm/lib/CodeGen/AsmPrinter/DwarfCompileUnit.cpp +++ b/llvm/lib/CodeGen/AsmPrinter/DwarfCompileUnit.cpp @@ -1793,9 +1793,13 @@ void DwarfCompileUnit::createBaseTypeDIEs() { "_" + Twine(Btr.BitSize)).toStringRef(Str)); addUInt(Die, dwarf::DW_AT_encoding, dwarf::DW_FORM_data1, Btr.Encoding); // Round up to smallest number of bytes that contains this number of bits. + // ExprRefedBaseTypes is populated with types referenced by + // DW_OP_LLVM_convert operations in location expressions. These are often + // byte-sized, but one common counter-example is 1-bit sized conversions + // from `i1` types. TODO: Should these use DW_AT_bit_size? See + // DwarfUnit::constructTypeDIE. addUInt(Die, dwarf::DW_AT_byte_size, std::nullopt, divideCeil(Btr.BitSize, 8)); - Btr.Die = &Die; } } diff --git a/llvm/lib/CodeGen/AsmPrinter/DwarfUnit.cpp b/llvm/lib/CodeGen/AsmPrinter/DwarfUnit.cpp index e40fb76..555c56f 100644 --- a/llvm/lib/CodeGen/AsmPrinter/DwarfUnit.cpp +++ b/llvm/lib/CodeGen/AsmPrinter/DwarfUnit.cpp @@ -766,8 +766,19 @@ void DwarfUnit::constructTypeDIE(DIE &Buffer, const DIBasicType *BTy) { addUInt(Buffer, dwarf::DW_AT_encoding, dwarf::DW_FORM_data1, BTy->getEncoding()); - uint64_t Size = BTy->getSizeInBits() >> 3; - addUInt(Buffer, dwarf::DW_AT_byte_size, std::nullopt, Size); + uint64_t SizeInBytes = divideCeil(BTy->getSizeInBits(), 8); + addUInt(Buffer, dwarf::DW_AT_byte_size, std::nullopt, SizeInBytes); + if (BTy->getTag() == dwarf::Tag::DW_TAG_base_type) { + // DW_TAG_base_type: + // If the value of an object of the given type does not fully occupy the + // storage described by a byte size attribute, the base type entry may also + // have a DW_AT_bit_size [...] attribute. + // TODO: Do big endian targets need DW_AT_data_bit_offset? See discussion in + // pull request #164372. + if (uint64_t DataSizeInBits = BTy->getDataSizeInBits(); + DataSizeInBits && DataSizeInBits != SizeInBytes * 8) + addUInt(Buffer, dwarf::DW_AT_bit_size, std::nullopt, DataSizeInBits); + } if (BTy->isBigEndian()) addUInt(Buffer, dwarf::DW_AT_endianity, std::nullopt, dwarf::DW_END_big); diff --git a/llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp b/llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp index 1f10478..9ace7d6 100644 --- a/llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp +++ b/llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp @@ -4425,6 +4425,7 @@ void CombinerHelper::applyBuildFnNoErase( } bool CombinerHelper::matchOrShiftToFunnelShift(MachineInstr &MI, + bool AllowScalarConstants, BuildFnTy &MatchInfo) const { assert(MI.getOpcode() == TargetOpcode::G_OR); @@ -4444,31 +4445,29 @@ bool CombinerHelper::matchOrShiftToFunnelShift(MachineInstr &MI, // Given constants C0 and C1 such that C0 + C1 is bit-width: // (or (shl x, C0), (lshr y, C1)) -> (fshl x, y, C0) or (fshr x, y, C1) - int64_t CstShlAmt, CstLShrAmt; + int64_t CstShlAmt = 0, CstLShrAmt; if (mi_match(ShlAmt, MRI, m_ICstOrSplat(CstShlAmt)) && mi_match(LShrAmt, MRI, m_ICstOrSplat(CstLShrAmt)) && CstShlAmt + CstLShrAmt == BitWidth) { FshOpc = TargetOpcode::G_FSHR; Amt = LShrAmt; - } else if (mi_match(LShrAmt, MRI, m_GSub(m_SpecificICstOrSplat(BitWidth), m_Reg(Amt))) && ShlAmt == Amt) { // (or (shl x, amt), (lshr y, (sub bw, amt))) -> (fshl x, y, amt) FshOpc = TargetOpcode::G_FSHL; - } else if (mi_match(ShlAmt, MRI, m_GSub(m_SpecificICstOrSplat(BitWidth), m_Reg(Amt))) && LShrAmt == Amt) { // (or (shl x, (sub bw, amt)), (lshr y, amt)) -> (fshr x, y, amt) FshOpc = TargetOpcode::G_FSHR; - } else { return false; } LLT AmtTy = MRI.getType(Amt); - if (!isLegalOrBeforeLegalizer({FshOpc, {Ty, AmtTy}})) + if (!isLegalOrBeforeLegalizer({FshOpc, {Ty, AmtTy}}) && + (!AllowScalarConstants || CstShlAmt == 0 || !Ty.isScalar())) return false; MatchInfo = [=](MachineIRBuilder &B) { diff --git a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp index cf221bb..1ef5dc2 100644 --- a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp @@ -23506,6 +23506,93 @@ SDValue DAGCombiner::visitINSERT_VECTOR_ELT(SDNode *N) { // inselt undef, InVal, EltNo --> build_vector < InVal, InVal, ... > if (InVec.isUndef() && TLI.shouldSplatInsEltVarIndex(VT)) return DAG.getSplat(VT, DL, InVal); + + // Extend this type to be byte-addressable + EVT OldVT = VT; + EVT EltVT = VT.getVectorElementType(); + bool IsByteSized = EltVT.isByteSized(); + if (!IsByteSized) { + EltVT = + EltVT.changeTypeToInteger().getRoundIntegerType(*DAG.getContext()); + VT = VT.changeElementType(EltVT); + } + + // Check if this operation will be handled the default way for its type. + auto IsTypeDefaultHandled = [this](EVT VT) { + return TLI.getTypeAction(*DAG.getContext(), VT) == + TargetLowering::TypeSplitVector || + TLI.isOperationExpand(ISD::INSERT_VECTOR_ELT, VT); + }; + + // Check if this operation is illegal and will be handled the default way, + // even after extending the type to be byte-addressable. + if (IsTypeDefaultHandled(OldVT) && IsTypeDefaultHandled(VT)) { + // For each dynamic insertelt, the default way will save the vector to + // the stack, store at an offset, and load the modified vector. This can + // dramatically increase code size if we have a chain of insertelts on a + // large vector: requiring O(V*C) stores/loads where V = length of + // vector and C is length of chain. If each insertelt is only fed into the + // next, the vector is write-only across this chain, and we can just + // save once before the chain and load after in O(V + C) operations. + SmallVector<SDNode *> Seq{N}; + unsigned NumDynamic = 1; + while (true) { + SDValue InVec = Seq.back()->getOperand(0); + if (InVec.getOpcode() != ISD::INSERT_VECTOR_ELT) + break; + Seq.push_back(InVec.getNode()); + NumDynamic += !isa<ConstantSDNode>(InVec.getOperand(2)); + } + + // It always and only makes sense to lower this sequence when we have more + // than one dynamic insertelt, since we will not have more than V constant + // insertelts, so we will be reducing the total number of stores+loads. + if (NumDynamic > 1) { + // In cases where the vector is illegal it will be broken down into + // parts and stored in parts - we should use the alignment for the + // smallest part. + Align SmallestAlign = DAG.getReducedAlign(VT, /*UseABI=*/false); + SDValue StackPtr = + DAG.CreateStackTemporary(VT.getStoreSize(), SmallestAlign); + auto &MF = DAG.getMachineFunction(); + int FrameIndex = cast<FrameIndexSDNode>(StackPtr.getNode())->getIndex(); + auto PtrInfo = MachinePointerInfo::getFixedStack(MF, FrameIndex); + + // Save the vector to the stack + SDValue InVec = Seq.back()->getOperand(0); + if (!IsByteSized) + InVec = DAG.getNode(ISD::ANY_EXTEND, DL, VT, InVec); + SDValue Store = DAG.getStore(DAG.getEntryNode(), DL, InVec, StackPtr, + PtrInfo, SmallestAlign); + + // Lower each dynamic insertelt to a store + for (SDNode *N : reverse(Seq)) { + SDValue Elmnt = N->getOperand(1); + SDValue Index = N->getOperand(2); + + // Check if we have to extend the element type + if (!IsByteSized && Elmnt.getValueType().bitsLT(EltVT)) + Elmnt = DAG.getNode(ISD::ANY_EXTEND, DL, EltVT, Elmnt); + + // Store the new element. This may be larger than the vector element + // type, so use a truncating store. + SDValue EltPtr = + TLI.getVectorElementPointer(DAG, StackPtr, VT, Index); + EVT EltVT = Elmnt.getValueType(); + Store = DAG.getTruncStore( + Store, DL, Elmnt, EltPtr, MachinePointerInfo::getUnknownStack(MF), + EltVT, + commonAlignment(SmallestAlign, EltVT.getFixedSizeInBits() / 8)); + } + + // Load the saved vector from the stack + SDValue Load = + DAG.getLoad(VT, DL, Store, StackPtr, PtrInfo, SmallestAlign); + SDValue LoadV = Load.getValue(0); + return IsByteSized ? LoadV : DAG.getAnyExtOrTrunc(LoadV, DL, OldVT); + } + } + return SDValue(); } diff --git a/llvm/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp b/llvm/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp index 5fb7e63..431a810 100644 --- a/llvm/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp @@ -2400,10 +2400,11 @@ SelectionDAGLegalize::ExpandDivRemLibCall(SDNode *Node, Results.push_back(Rem); } -/// Return true if sincos libcall is available. +/// Return true if sincos or __sincos_stret libcall is available. static bool isSinCosLibcallAvailable(SDNode *Node, const TargetLowering &TLI) { - RTLIB::Libcall LC = RTLIB::getSINCOS(Node->getSimpleValueType(0).SimpleTy); - return TLI.getLibcallName(LC) != nullptr; + MVT::SimpleValueType VT = Node->getSimpleValueType(0).SimpleTy; + return TLI.getLibcallImpl(RTLIB::getSINCOS(VT)) != RTLIB::Unsupported || + TLI.getLibcallImpl(RTLIB::getSINCOS_STRET(VT)) != RTLIB::Unsupported; } /// Only issue sincos libcall if both sin and cos are needed. @@ -3752,9 +3753,9 @@ bool SelectionDAGLegalize::ExpandNode(SDNode *Node) { EVT VT = Node->getValueType(0); // Turn fsin / fcos into ISD::FSINCOS node if there are a pair of fsin / // fcos which share the same operand and both are used. - if ((TLI.isOperationLegalOrCustom(ISD::FSINCOS, VT) || - isSinCosLibcallAvailable(Node, TLI)) - && useSinCos(Node)) { + if ((TLI.isOperationLegal(ISD::FSINCOS, VT) || + isSinCosLibcallAvailable(Node, TLI)) && + useSinCos(Node)) { SDVTList VTs = DAG.getVTList(VT, VT); Tmp1 = DAG.getNode(ISD::FSINCOS, dl, VTs, Node->getOperand(0)); if (Node->getOpcode() == ISD::FCOS) diff --git a/llvm/lib/CodeGen/SwitchLoweringUtils.cpp b/llvm/lib/CodeGen/SwitchLoweringUtils.cpp index 038c499..3fa8243 100644 --- a/llvm/lib/CodeGen/SwitchLoweringUtils.cpp +++ b/llvm/lib/CodeGen/SwitchLoweringUtils.cpp @@ -198,7 +198,6 @@ bool SwitchCG::SwitchLowering::buildJumpTable(const CaseClusterVector &Clusters, assert(First <= Last); auto Prob = BranchProbability::getZero(); - unsigned NumCmps = 0; std::vector<MachineBasicBlock*> Table; DenseMap<MachineBasicBlock*, BranchProbability> JTProbs; @@ -206,12 +205,16 @@ bool SwitchCG::SwitchLowering::buildJumpTable(const CaseClusterVector &Clusters, for (unsigned I = First; I <= Last; ++I) JTProbs[Clusters[I].MBB] = BranchProbability::getZero(); + DenseMap<const BasicBlock *, unsigned int> DestMap; for (unsigned I = First; I <= Last; ++I) { assert(Clusters[I].Kind == CC_Range); Prob += Clusters[I].Prob; const APInt &Low = Clusters[I].Low->getValue(); const APInt &High = Clusters[I].High->getValue(); - NumCmps += (Low == High) ? 1 : 2; + unsigned int NumCmp = (Low == High) ? 1 : 2; + const BasicBlock *BB = Clusters[I].MBB->getBasicBlock(); + DestMap[BB] += NumCmp; + if (I != First) { // Fill the gap between this and the previous cluster. const APInt &PreviousHigh = Clusters[I - 1].High->getValue(); @@ -226,9 +229,7 @@ bool SwitchCG::SwitchLowering::buildJumpTable(const CaseClusterVector &Clusters, JTProbs[Clusters[I].MBB] += Clusters[I].Prob; } - unsigned NumDests = JTProbs.size(); - if (TLI->isSuitableForBitTests(NumDests, NumCmps, - Clusters[First].Low->getValue(), + if (TLI->isSuitableForBitTests(DestMap, Clusters[First].Low->getValue(), Clusters[Last].High->getValue(), *DL)) { // Clusters[First..Last] should be lowered as bit tests instead. return false; @@ -372,20 +373,19 @@ bool SwitchCG::SwitchLowering::buildBitTests(CaseClusterVector &Clusters, if (First == Last) return false; - BitVector Dests(FuncInfo.MF->getNumBlockIDs()); - unsigned NumCmps = 0; + DenseMap<const BasicBlock *, unsigned int> DestMap; for (int64_t I = First; I <= Last; ++I) { assert(Clusters[I].Kind == CC_Range); - Dests.set(Clusters[I].MBB->getNumber()); - NumCmps += (Clusters[I].Low == Clusters[I].High) ? 1 : 2; + unsigned NumCmp = (Clusters[I].Low == Clusters[I].High) ? 1 : 2; + const BasicBlock *BB = Clusters[I].MBB->getBasicBlock(); + DestMap[BB] += NumCmp; } - unsigned NumDests = Dests.count(); APInt Low = Clusters[First].Low->getValue(); APInt High = Clusters[Last].High->getValue(); assert(Low.slt(High)); - if (!TLI->isSuitableForBitTests(NumDests, NumCmps, Low, High, *DL)) + if (!TLI->isSuitableForBitTests(DestMap, Low, High, *DL)) return false; APInt LowBound; diff --git a/llvm/lib/CodeGen/TargetLoweringBase.cpp b/llvm/lib/CodeGen/TargetLoweringBase.cpp index 59798b3..b3535eac 100644 --- a/llvm/lib/CodeGen/TargetLoweringBase.cpp +++ b/llvm/lib/CodeGen/TargetLoweringBase.cpp @@ -11,6 +11,7 @@ //===----------------------------------------------------------------------===// #include "llvm/ADT/BitVector.h" +#include "llvm/ADT/DenseMap.h" #include "llvm/ADT/STLExtras.h" #include "llvm/ADT/SmallVector.h" #include "llvm/ADT/StringExtras.h" @@ -90,6 +91,11 @@ static cl::opt<unsigned> OptsizeJumpTableDensity( cl::desc("Minimum density for building a jump table in " "an optsize function")); +static cl::opt<unsigned> MinimumBitTestCmpsOverride( + "min-bit-test-cmps", cl::init(2), cl::Hidden, + cl::desc("Set minimum of largest number of comparisons " + "to use bit test for switch.")); + // FIXME: This option is only to test if the strict fp operation processed // correctly by preventing mutating strict fp operation to normal fp operation // during development. When the backend supports strict float operation, this @@ -428,6 +434,11 @@ RTLIB::Libcall RTLIB::getSINCOSPI(EVT RetVT) { SINCOSPI_F128, SINCOSPI_PPCF128); } +RTLIB::Libcall RTLIB::getSINCOS_STRET(EVT RetVT) { + return getFPLibCall(RetVT, SINCOS_STRET_F32, SINCOS_STRET_F64, + UNKNOWN_LIBCALL, UNKNOWN_LIBCALL, UNKNOWN_LIBCALL); +} + RTLIB::Libcall RTLIB::getMODF(EVT RetVT) { return getFPLibCall(RetVT, MODF_F32, MODF_F64, MODF_F80, MODF_F128, MODF_PPCF128); @@ -719,6 +730,8 @@ TargetLoweringBase::TargetLoweringBase(const TargetMachine &tm) MinCmpXchgSizeInBits = 0; SupportsUnalignedAtomics = false; + + MinimumBitTestCmps = MinimumBitTestCmpsOverride; } // Define the virtual destructor out-of-line to act as a key method to anchor @@ -2129,6 +2142,14 @@ bool TargetLoweringBase::isJumpTableRelative() const { return getTargetMachine().isPositionIndependent(); } +unsigned TargetLoweringBase::getMinimumBitTestCmps() const { + return MinimumBitTestCmps; +} + +void TargetLoweringBase::setMinimumBitTestCmps(unsigned Val) { + MinimumBitTestCmps = Val; +} + Align TargetLoweringBase::getPrefLoopAlignment(MachineLoop *ML) const { if (TM.Options.LoopAlignment) return Align(TM.Options.LoopAlignment); |
