diff options
author | mingmingl <mingmingl@google.com> | 2025-02-04 11:11:14 -0800 |
---|---|---|
committer | mingmingl <mingmingl@google.com> | 2025-02-04 11:11:14 -0800 |
commit | e91747a92d27ecf799427bf563f9f64f7c4d2447 (patch) | |
tree | 7aa5a8a9170deec293e152bdf2be804399dcd612 /llvm/lib/CodeGen | |
parent | 3a8d9337d816aef41c3ca1484be8b933a71a3c46 (diff) | |
parent | 53d6e59b594639417cdbfcfa2d18cea64acb4009 (diff) | |
download | llvm-users/mingmingl-llvm/spr/sdpglobalvariable.zip llvm-users/mingmingl-llvm/spr/sdpglobalvariable.tar.gz llvm-users/mingmingl-llvm/spr/sdpglobalvariable.tar.bz2 |
Merge branch 'main' into users/mingmingl-llvm/spr/sdpglobalvariableusers/mingmingl-llvm/spr/sdpglobalvariable
Diffstat (limited to 'llvm/lib/CodeGen')
-rw-r--r-- | llvm/lib/CodeGen/AsmPrinter/DwarfCompileUnit.cpp | 70 | ||||
-rw-r--r-- | llvm/lib/CodeGen/CodeGen.cpp | 2 | ||||
-rw-r--r-- | llvm/lib/CodeGen/GlobalISel/IRTranslator.cpp | 4 | ||||
-rw-r--r-- | llvm/lib/CodeGen/MachineCopyPropagation.cpp | 68 | ||||
-rw-r--r-- | llvm/lib/CodeGen/MachineUniformityAnalysis.cpp | 4 | ||||
-rw-r--r-- | llvm/lib/CodeGen/ReachingDefAnalysis.cpp | 19 | ||||
-rw-r--r-- | llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp | 103 | ||||
-rw-r--r-- | llvm/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp | 59 | ||||
-rw-r--r-- | llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp | 74 |
9 files changed, 243 insertions, 160 deletions
diff --git a/llvm/lib/CodeGen/AsmPrinter/DwarfCompileUnit.cpp b/llvm/lib/CodeGen/AsmPrinter/DwarfCompileUnit.cpp index 2f96366..6cf05fd 100644 --- a/llvm/lib/CodeGen/AsmPrinter/DwarfCompileUnit.cpp +++ b/llvm/lib/CodeGen/AsmPrinter/DwarfCompileUnit.cpp @@ -33,6 +33,7 @@ #include "llvm/MC/MCSymbolWasm.h" #include "llvm/MC/MachineLocation.h" #include "llvm/Support/CommandLine.h" +#include "llvm/Support/NVPTXAddrSpace.h" #include "llvm/Target/TargetLoweringObjectFile.h" #include "llvm/Target/TargetMachine.h" #include "llvm/Target/TargetOptions.h" @@ -75,6 +76,26 @@ static dwarf::Tag GetCompileUnitType(UnitKind Kind, DwarfDebug *DW) { return dwarf::DW_TAG_compile_unit; } +/// Translate NVVM IR address space code to DWARF correspondent value +static unsigned translateToNVVMDWARFAddrSpace(unsigned AddrSpace) { + switch (AddrSpace) { + case NVPTXAS::ADDRESS_SPACE_GENERIC: + return NVPTXAS::DWARF_ADDR_generic_space; + case NVPTXAS::ADDRESS_SPACE_GLOBAL: + return NVPTXAS::DWARF_ADDR_global_space; + case NVPTXAS::ADDRESS_SPACE_SHARED: + return NVPTXAS::DWARF_ADDR_shared_space; + case NVPTXAS::ADDRESS_SPACE_CONST: + return NVPTXAS::DWARF_ADDR_const_space; + case NVPTXAS::ADDRESS_SPACE_LOCAL: + return NVPTXAS::DWARF_ADDR_local_space; + default: + llvm_unreachable( + "Cannot translate unknown address space to DWARF address space"); + return AddrSpace; + } +} + DwarfCompileUnit::DwarfCompileUnit(unsigned UID, const DICompileUnit *Node, AsmPrinter *A, DwarfDebug *DW, DwarfFile *DWU, UnitKind Kind) @@ -264,14 +285,11 @@ void DwarfCompileUnit::addLocationAttribute( } if (Expr) { - // According to - // https://docs.nvidia.com/cuda/archive/10.0/ptx-writers-guide-to-interoperability/index.html#cuda-specific-dwarf - // cuda-gdb requires DW_AT_address_class for all variables to be able to - // correctly interpret address space of the variable address. + // cuda-gdb special requirement. See NVPTXAS::DWARF_AddressSpace // Decode DW_OP_constu <DWARF Address Space> DW_OP_swap DW_OP_xderef - // sequence for the NVPTX + gdb target. - unsigned LocalNVPTXAddressSpace; + // sequence to specify corresponding address space. if (Asm->TM.getTargetTriple().isNVPTX() && DD->tuneForGDB()) { + unsigned LocalNVPTXAddressSpace; const DIExpression *NewExpr = DIExpression::extractAddressClass(Expr, LocalNVPTXAddressSpace); if (NewExpr != Expr) { @@ -363,6 +381,10 @@ void DwarfCompileUnit::addLocationAttribute( DD->addArangeLabel(SymbolCU(this, Sym)); addOpAddress(*Loc, Sym); } + if (Asm->TM.getTargetTriple().isNVPTX() && DD->tuneForGDB() && + !NVPTXAddressSpace) + NVPTXAddressSpace = + translateToNVVMDWARFAddrSpace(Global->getType()->getAddressSpace()); } // Global variables attached to symbols are memory locations. // It would be better if this were unconditional, but malformed input that @@ -373,13 +395,9 @@ void DwarfCompileUnit::addLocationAttribute( DwarfExpr->addExpression(Expr); } if (Asm->TM.getTargetTriple().isNVPTX() && DD->tuneForGDB()) { - // According to - // https://docs.nvidia.com/cuda/archive/10.0/ptx-writers-guide-to-interoperability/index.html#cuda-specific-dwarf - // cuda-gdb requires DW_AT_address_class for all variables to be able to - // correctly interpret address space of the variable address. - const unsigned NVPTX_ADDR_global_space = 5; + // cuda-gdb special requirement. See NVPTXAS::DWARF_AddressSpace addUInt(*VariableDIE, dwarf::DW_AT_address_class, dwarf::DW_FORM_data1, - NVPTXAddressSpace.value_or(NVPTX_ADDR_global_space)); + NVPTXAddressSpace.value_or(NVPTXAS::DWARF_ADDR_global_space)); } if (Loc) addBlock(*VariableDIE, dwarf::DW_AT_location, DwarfExpr->finalize()); @@ -793,10 +811,10 @@ void DwarfCompileUnit::applyConcreteDbgVariableAttributes( const DbgValueLoc *DVal = &Single.getValueLoc(); if (Asm->TM.getTargetTriple().isNVPTX() && DD->tuneForGDB() && !Single.getExpr()) { - // Lack of expression means it is a register. Registers for PTX need to - // be marked with DW_AT_address_class = 2. See - // https://docs.nvidia.com/cuda/archive/10.0/ptx-writers-guide-to-interoperability/index.html#cuda-specific-dwarf - addUInt(VariableDie, dwarf::DW_AT_address_class, dwarf::DW_FORM_data1, 2); + // cuda-gdb special requirement. See NVPTXAS::DWARF_AddressSpace + // Lack of expression means it is a register. + addUInt(VariableDie, dwarf::DW_AT_address_class, dwarf::DW_FORM_data1, + NVPTXAS::DWARF_ADDR_reg_space); } if (!DVal->isVariadic()) { const DbgValueLocEntry *Entry = DVal->getLocEntries().begin(); @@ -922,14 +940,11 @@ void DwarfCompileUnit::applyConcreteDbgVariableAttributes(const Loc::MMI &MMI, SmallVector<uint64_t, 8> Ops; TRI->getOffsetOpcodes(Offset, Ops); - // According to - // https://docs.nvidia.com/cuda/archive/10.0/ptx-writers-guide-to-interoperability/index.html#cuda-specific-dwarf - // cuda-gdb requires DW_AT_address_class for all variables to be - // able to correctly interpret address space of the variable - // address. Decode DW_OP_constu <DWARF Address Space> DW_OP_swap - // DW_OP_xderef sequence for the NVPTX + gdb target. - unsigned LocalNVPTXAddressSpace; + // cuda-gdb special requirement. See NVPTXAS::DWARF_AddressSpace. + // Decode DW_OP_constu <DWARF Address Space> DW_OP_swap + // DW_OP_xderef sequence to specify address space. if (Asm->TM.getTargetTriple().isNVPTX() && DD->tuneForGDB()) { + unsigned LocalNVPTXAddressSpace; const DIExpression *NewExpr = DIExpression::extractAddressClass(Expr, LocalNVPTXAddressSpace); if (NewExpr != Expr) { @@ -949,14 +964,9 @@ void DwarfCompileUnit::applyConcreteDbgVariableAttributes(const Loc::MMI &MMI, DwarfExpr.addExpression(std::move(Cursor)); } if (Asm->TM.getTargetTriple().isNVPTX() && DD->tuneForGDB()) { - // According to - // https://docs.nvidia.com/cuda/archive/10.0/ptx-writers-guide-to-interoperability/index.html#cuda-specific-dwarf - // cuda-gdb requires DW_AT_address_class for all variables to be - // able to correctly interpret address space of the variable - // address. - const unsigned NVPTX_ADDR_local_space = 6; + // cuda-gdb special requirement. See NVPTXAS::DWARF_AddressSpace. addUInt(VariableDie, dwarf::DW_AT_address_class, dwarf::DW_FORM_data1, - NVPTXAddressSpace.value_or(NVPTX_ADDR_local_space)); + NVPTXAddressSpace.value_or(NVPTXAS::DWARF_ADDR_local_space)); } addBlock(VariableDie, dwarf::DW_AT_location, DwarfExpr.finalize()); if (DwarfExpr.TagOffset) diff --git a/llvm/lib/CodeGen/CodeGen.cpp b/llvm/lib/CodeGen/CodeGen.cpp index 5f0c7ec9c..0a7937e 100644 --- a/llvm/lib/CodeGen/CodeGen.cpp +++ b/llvm/lib/CodeGen/CodeGen.cpp @@ -77,7 +77,7 @@ void llvm::initializeCodeGen(PassRegistry &Registry) { initializeMachineCFGPrinterPass(Registry); initializeMachineCSELegacyPass(Registry); initializeMachineCombinerPass(Registry); - initializeMachineCopyPropagationPass(Registry); + initializeMachineCopyPropagationLegacyPass(Registry); initializeMachineCycleInfoPrinterPassPass(Registry); initializeMachineCycleInfoWrapperPassPass(Registry); initializeMachineDominatorTreeWrapperPassPass(Registry); diff --git a/llvm/lib/CodeGen/GlobalISel/IRTranslator.cpp b/llvm/lib/CodeGen/GlobalISel/IRTranslator.cpp index 3e43299..362d856 100644 --- a/llvm/lib/CodeGen/GlobalISel/IRTranslator.cpp +++ b/llvm/lib/CodeGen/GlobalISel/IRTranslator.cpp @@ -2441,9 +2441,7 @@ bool IRTranslator::translateKnownIntrinsic(const CallInst &CI, Intrinsic::ID ID, return true; } case Intrinsic::invariant_start: { - LLT PtrTy = getLLTForType(*CI.getArgOperand(0)->getType(), *DL); - Register Undef = MRI->createGenericVirtualRegister(PtrTy); - MIRBuilder.buildUndef(Undef); + MIRBuilder.buildUndef(getOrCreateVReg(CI)); return true; } case Intrinsic::invariant_end: diff --git a/llvm/lib/CodeGen/MachineCopyPropagation.cpp b/llvm/lib/CodeGen/MachineCopyPropagation.cpp index d44b064..460749a 100644 --- a/llvm/lib/CodeGen/MachineCopyPropagation.cpp +++ b/llvm/lib/CodeGen/MachineCopyPropagation.cpp @@ -48,6 +48,7 @@ // //===----------------------------------------------------------------------===// +#include "llvm/CodeGen/MachineCopyPropagation.h" #include "llvm/ADT/DenseMap.h" #include "llvm/ADT/STLExtras.h" #include "llvm/ADT/SetVector.h" @@ -449,7 +450,7 @@ public: } }; -class MachineCopyPropagation : public MachineFunctionPass { +class MachineCopyPropagation { const TargetRegisterInfo *TRI = nullptr; const TargetInstrInfo *TII = nullptr; const MachineRegisterInfo *MRI = nullptr; @@ -458,24 +459,10 @@ class MachineCopyPropagation : public MachineFunctionPass { bool UseCopyInstr; public: - static char ID; // Pass identification, replacement for typeid - MachineCopyPropagation(bool CopyInstr = false) - : MachineFunctionPass(ID), UseCopyInstr(CopyInstr || MCPUseCopyInstr) { - initializeMachineCopyPropagationPass(*PassRegistry::getPassRegistry()); - } - - void getAnalysisUsage(AnalysisUsage &AU) const override { - AU.setPreservesCFG(); - MachineFunctionPass::getAnalysisUsage(AU); - } - - bool runOnMachineFunction(MachineFunction &MF) override; + : UseCopyInstr(CopyInstr || MCPUseCopyInstr) {} - MachineFunctionProperties getRequiredProperties() const override { - return MachineFunctionProperties().set( - MachineFunctionProperties::Property::NoVRegs); - } + bool run(MachineFunction &MF); private: typedef enum { DebugUse = false, RegularUse = true } DebugType; @@ -510,13 +497,35 @@ private: bool Changed = false; }; +class MachineCopyPropagationLegacy : public MachineFunctionPass { + bool UseCopyInstr; + +public: + static char ID; // pass identification + + MachineCopyPropagationLegacy(bool UseCopyInstr = false) + : MachineFunctionPass(ID), UseCopyInstr(UseCopyInstr) {} + + void getAnalysisUsage(AnalysisUsage &AU) const override { + AU.setPreservesCFG(); + MachineFunctionPass::getAnalysisUsage(AU); + } + + bool runOnMachineFunction(MachineFunction &MF) override; + + MachineFunctionProperties getRequiredProperties() const override { + return MachineFunctionProperties().set( + MachineFunctionProperties::Property::NoVRegs); + } +}; + } // end anonymous namespace -char MachineCopyPropagation::ID = 0; +char MachineCopyPropagationLegacy::ID = 0; -char &llvm::MachineCopyPropagationID = MachineCopyPropagation::ID; +char &llvm::MachineCopyPropagationID = MachineCopyPropagationLegacy::ID; -INITIALIZE_PASS(MachineCopyPropagation, DEBUG_TYPE, +INITIALIZE_PASS(MachineCopyPropagationLegacy, DEBUG_TYPE, "Machine Copy Propagation Pass", false, false) void MachineCopyPropagation::ReadRegister(MCRegister Reg, MachineInstr &Reader, @@ -1563,10 +1572,25 @@ void MachineCopyPropagation::EliminateSpillageCopies(MachineBasicBlock &MBB) { Tracker.clear(); } -bool MachineCopyPropagation::runOnMachineFunction(MachineFunction &MF) { +bool MachineCopyPropagationLegacy::runOnMachineFunction(MachineFunction &MF) { if (skipFunction(MF.getFunction())) return false; + return MachineCopyPropagation(UseCopyInstr).run(MF); +} + +PreservedAnalyses +MachineCopyPropagationPass::run(MachineFunction &MF, + MachineFunctionAnalysisManager &) { + MFPropsModifier _(*this, MF); + if (!MachineCopyPropagation(UseCopyInstr).run(MF)) + return PreservedAnalyses::all(); + auto PA = getMachineFunctionPassPreservedAnalyses(); + PA.preserveSet<CFGAnalyses>(); + return PA; +} + +bool MachineCopyPropagation::run(MachineFunction &MF) { bool isSpillageCopyElimEnabled = false; switch (EnableSpillageCopyElimination) { case cl::BOU_UNSET: @@ -1599,5 +1623,5 @@ bool MachineCopyPropagation::runOnMachineFunction(MachineFunction &MF) { MachineFunctionPass * llvm::createMachineCopyPropagationPass(bool UseCopyInstr = false) { - return new MachineCopyPropagation(UseCopyInstr); + return new MachineCopyPropagationLegacy(UseCopyInstr); } diff --git a/llvm/lib/CodeGen/MachineUniformityAnalysis.cpp b/llvm/lib/CodeGen/MachineUniformityAnalysis.cpp index a4b78c1..b5dc487 100644 --- a/llvm/lib/CodeGen/MachineUniformityAnalysis.cpp +++ b/llvm/lib/CodeGen/MachineUniformityAnalysis.cpp @@ -185,11 +185,11 @@ MachineUniformityAnalysisPass::MachineUniformityAnalysisPass() } INITIALIZE_PASS_BEGIN(MachineUniformityAnalysisPass, "machine-uniformity", - "Machine Uniformity Info Analysis", true, true) + "Machine Uniformity Info Analysis", false, true) INITIALIZE_PASS_DEPENDENCY(MachineCycleInfoWrapperPass) INITIALIZE_PASS_DEPENDENCY(MachineDominatorTreeWrapperPass) INITIALIZE_PASS_END(MachineUniformityAnalysisPass, "machine-uniformity", - "Machine Uniformity Info Analysis", true, true) + "Machine Uniformity Info Analysis", false, true) void MachineUniformityAnalysisPass::getAnalysisUsage(AnalysisUsage &AU) const { AU.setPreservesAll(); diff --git a/llvm/lib/CodeGen/ReachingDefAnalysis.cpp b/llvm/lib/CodeGen/ReachingDefAnalysis.cpp index fa60881..59ad9ff 100644 --- a/llvm/lib/CodeGen/ReachingDefAnalysis.cpp +++ b/llvm/lib/CodeGen/ReachingDefAnalysis.cpp @@ -147,16 +147,7 @@ void ReachingDefAnalysis::processDefs(MachineInstr *MI) { assert(FrameIndex >= 0 && "Can't handle negative frame indicies yet!"); if (!isFIDef(*MI, FrameIndex, TII)) continue; - if (MBBFrameObjsReachingDefs.contains(MBBNumber)) { - auto Frame2InstrIdx = MBBFrameObjsReachingDefs[MBBNumber]; - if (Frame2InstrIdx.count(FrameIndex - ObjectIndexBegin) > 0) - Frame2InstrIdx[FrameIndex - ObjectIndexBegin].push_back(CurInstr); - else - Frame2InstrIdx[FrameIndex - ObjectIndexBegin] = {CurInstr}; - } else { - MBBFrameObjsReachingDefs[MBBNumber] = { - {FrameIndex - ObjectIndexBegin, {CurInstr}}}; - } + MBBFrameObjsReachingDefs[{MBBNumber, FrameIndex}].push_back(CurInstr); } if (!isValidRegDef(MO)) continue; @@ -351,9 +342,13 @@ int ReachingDefAnalysis::getReachingDef(MachineInstr *MI, Register Reg) const { int LatestDef = ReachingDefDefaultVal; if (Reg.isStack()) { + // Check that there was a reaching def. int FrameIndex = Reg.stackSlotIndex(); - for (int Def : MBBFrameObjsReachingDefs.lookup(MBBNumber).lookup( - FrameIndex - ObjectIndexBegin)) { + auto Lookup = MBBFrameObjsReachingDefs.find({MBBNumber, FrameIndex}); + if (Lookup == MBBFrameObjsReachingDefs.end()) + return LatestDef; + auto &Defs = Lookup->second; + for (int Def : Defs) { if (Def >= InstId) break; DefRes = Def; diff --git a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp index 882d6015..8858c20 100644 --- a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp @@ -385,17 +385,6 @@ namespace { bool getTruncatedStoreValue(StoreSDNode *ST, SDValue &Val); bool extendLoadedValueToExtension(LoadSDNode *LD, SDValue &Val); - /// Replace an ISD::EXTRACT_VECTOR_ELT of a load with a narrowed - /// load. - /// - /// \param EVE ISD::EXTRACT_VECTOR_ELT to be replaced. - /// \param InVecVT type of the input vector to EVE with bitcasts resolved. - /// \param EltNo index of the vector element to load. - /// \param OriginalLoad load that EVE came from to be replaced. - /// \returns EVE on success SDValue() on failure. - SDValue scalarizeExtractedVectorLoad(SDNode *EVE, EVT InVecVT, - SDValue EltNo, - LoadSDNode *OriginalLoad); void ReplaceLoadWithPromotedLoad(SDNode *Load, SDNode *ExtLoad); SDValue PromoteOperand(SDValue Op, EVT PVT, bool &Replace); SDValue SExtPromoteOperand(SDValue Op, EVT PVT); @@ -22719,81 +22708,6 @@ SDValue DAGCombiner::visitINSERT_VECTOR_ELT(SDNode *N) { return SDValue(); } -SDValue DAGCombiner::scalarizeExtractedVectorLoad(SDNode *EVE, EVT InVecVT, - SDValue EltNo, - LoadSDNode *OriginalLoad) { - assert(OriginalLoad->isSimple()); - - EVT ResultVT = EVE->getValueType(0); - EVT VecEltVT = InVecVT.getVectorElementType(); - - // If the vector element type is not a multiple of a byte then we are unable - // to correctly compute an address to load only the extracted element as a - // scalar. - if (!VecEltVT.isByteSized()) - return SDValue(); - - ISD::LoadExtType ExtTy = - ResultVT.bitsGT(VecEltVT) ? ISD::EXTLOAD : ISD::NON_EXTLOAD; - if (!TLI.isOperationLegalOrCustom(ISD::LOAD, VecEltVT) || - !TLI.shouldReduceLoadWidth(OriginalLoad, ExtTy, VecEltVT)) - return SDValue(); - - Align Alignment = OriginalLoad->getAlign(); - MachinePointerInfo MPI; - SDLoc DL(EVE); - if (auto *ConstEltNo = dyn_cast<ConstantSDNode>(EltNo)) { - int Elt = ConstEltNo->getZExtValue(); - unsigned PtrOff = VecEltVT.getSizeInBits() * Elt / 8; - MPI = OriginalLoad->getPointerInfo().getWithOffset(PtrOff); - Alignment = commonAlignment(Alignment, PtrOff); - } else { - // Discard the pointer info except the address space because the memory - // operand can't represent this new access since the offset is variable. - MPI = MachinePointerInfo(OriginalLoad->getPointerInfo().getAddrSpace()); - Alignment = commonAlignment(Alignment, VecEltVT.getSizeInBits() / 8); - } - - unsigned IsFast = 0; - if (!TLI.allowsMemoryAccess(*DAG.getContext(), DAG.getDataLayout(), VecEltVT, - OriginalLoad->getAddressSpace(), Alignment, - OriginalLoad->getMemOperand()->getFlags(), - &IsFast) || - !IsFast) - return SDValue(); - - SDValue NewPtr = TLI.getVectorElementPointer(DAG, OriginalLoad->getBasePtr(), - InVecVT, EltNo); - - // We are replacing a vector load with a scalar load. The new load must have - // identical memory op ordering to the original. - SDValue Load; - if (ResultVT.bitsGT(VecEltVT)) { - // If the result type of vextract is wider than the load, then issue an - // extending load instead. - ISD::LoadExtType ExtType = - TLI.isLoadExtLegal(ISD::ZEXTLOAD, ResultVT, VecEltVT) ? ISD::ZEXTLOAD - : ISD::EXTLOAD; - Load = DAG.getExtLoad(ExtType, DL, ResultVT, OriginalLoad->getChain(), - NewPtr, MPI, VecEltVT, Alignment, - OriginalLoad->getMemOperand()->getFlags(), - OriginalLoad->getAAInfo()); - DAG.makeEquivalentMemoryOrdering(OriginalLoad, Load); - } else { - // The result type is narrower or the same width as the vector element - Load = DAG.getLoad(VecEltVT, DL, OriginalLoad->getChain(), NewPtr, MPI, - Alignment, OriginalLoad->getMemOperand()->getFlags(), - OriginalLoad->getAAInfo()); - DAG.makeEquivalentMemoryOrdering(OriginalLoad, Load); - if (ResultVT.bitsLT(VecEltVT)) - Load = DAG.getNode(ISD::TRUNCATE, DL, ResultVT, Load); - else - Load = DAG.getBitcast(ResultVT, Load); - } - ++OpsNarrowed; - return Load; -} - /// Transform a vector binary operation into a scalar binary operation by moving /// the math/logic after an extract element of a vector. static SDValue scalarizeExtractedBinOp(SDNode *ExtElt, SelectionDAG &DAG, @@ -23272,8 +23186,13 @@ SDValue DAGCombiner::visitEXTRACT_VECTOR_ELT(SDNode *N) { ISD::isNormalLoad(VecOp.getNode()) && !Index->hasPredecessor(VecOp.getNode())) { auto *VecLoad = dyn_cast<LoadSDNode>(VecOp); - if (VecLoad && VecLoad->isSimple()) - return scalarizeExtractedVectorLoad(N, VecVT, Index, VecLoad); + if (VecLoad && VecLoad->isSimple()) { + if (SDValue Scalarized = TLI.scalarizeExtractedVectorLoad( + ExtVT, SDLoc(N), VecVT, Index, VecLoad, DAG)) { + ++OpsNarrowed; + return Scalarized; + } + } } // Perform only after legalization to ensure build_vector / vector_shuffle @@ -23361,7 +23280,13 @@ SDValue DAGCombiner::visitEXTRACT_VECTOR_ELT(SDNode *N) { if (Elt == -1) return DAG.getUNDEF(LVT); - return scalarizeExtractedVectorLoad(N, VecVT, Index, LN0); + if (SDValue Scalarized = + TLI.scalarizeExtractedVectorLoad(LVT, DL, VecVT, Index, LN0, DAG)) { + ++OpsNarrowed; + return Scalarized; + } + + return SDValue(); } // Simplify (build_vec (ext )) to (bitcast (build_vec )) diff --git a/llvm/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp b/llvm/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp index 625052b..f1a91a7 100644 --- a/llvm/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp @@ -566,6 +566,29 @@ SDValue DAGTypeLegalizer::PromoteIntRes_BITCAST(SDNode *N) { } } + // TODO: Handle big endian + if (!NOutVT.isVector() && InOp.getValueType().isVector() && + DAG.getDataLayout().isLittleEndian()) { + // Pad the vector operand with undef and cast to a wider integer. + EVT EltVT = InOp.getValueType().getVectorElementType(); + TypeSize EltSize = EltVT.getSizeInBits(); + TypeSize OutSize = NOutVT.getSizeInBits(); + + if (OutSize.hasKnownScalarFactor(EltSize)) { + unsigned NumEltsWithPadding = OutSize.getKnownScalarFactor(EltSize); + EVT WideVecVT = + EVT::getVectorVT(*DAG.getContext(), EltVT, NumEltsWithPadding); + + if (isTypeLegal(WideVecVT)) { + SDValue Inserted = DAG.getNode(ISD::INSERT_SUBVECTOR, dl, WideVecVT, + DAG.getUNDEF(WideVecVT), InOp, + DAG.getVectorIdxConstant(0, dl)); + + return DAG.getNode(ISD::BITCAST, dl, NOutVT, Inserted); + } + } + } + return DAG.getNode(ISD::ANY_EXTEND, dl, NOutVT, CreateStackStoreLoad(InOp, OutVT)); } @@ -2181,9 +2204,43 @@ SDValue DAGTypeLegalizer::PromoteIntOp_ATOMIC_STORE(AtomicSDNode *N) { } SDValue DAGTypeLegalizer::PromoteIntOp_BITCAST(SDNode *N) { + EVT OutVT = N->getValueType(0); + SDValue InOp = N->getOperand(0); + EVT InVT = InOp.getValueType(); + EVT NInVT = TLI.getTypeToTransformTo(*DAG.getContext(), InVT); + SDLoc dl(N); + + switch (getTypeAction(InVT)) { + case TargetLowering::TypePromoteInteger: { + // TODO: Handle big endian + if (OutVT.isVector() && DAG.getDataLayout().isLittleEndian()) { + EVT EltVT = OutVT.getVectorElementType(); + TypeSize EltSize = EltVT.getSizeInBits(); + TypeSize NInSize = NInVT.getSizeInBits(); + + if (NInSize.hasKnownScalarFactor(EltSize)) { + unsigned NumEltsWithPadding = NInSize.getKnownScalarFactor(EltSize); + EVT WideVecVT = + EVT::getVectorVT(*DAG.getContext(), EltVT, NumEltsWithPadding); + + if (isTypeLegal(WideVecVT)) { + SDValue Promoted = GetPromotedInteger(InOp); + SDValue Cast = DAG.getNode(ISD::BITCAST, dl, WideVecVT, Promoted); + return DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, OutVT, Cast, + DAG.getVectorIdxConstant(0, dl)); + } + } + } + + break; + } + default: + break; + } + // This should only occur in unusual situations like bitcasting to an // x86_fp80, so just turn it into a store+load - return CreateStackStoreLoad(N->getOperand(0), N->getValueType(0)); + return CreateStackStoreLoad(InOp, OutVT); } SDValue DAGTypeLegalizer::PromoteIntOp_BR_CC(SDNode *N, unsigned OpNo) { diff --git a/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp b/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp index 98206b7..adfb960 100644 --- a/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp @@ -12114,3 +12114,77 @@ SDValue TargetLowering::expandVectorNaryOpBySplitting(SDNode *Node, SDValue SplitOpHi = DAG.getNode(Opcode, DL, HiVT, HiOps); return DAG.getNode(ISD::CONCAT_VECTORS, DL, VT, SplitOpLo, SplitOpHi); } + +SDValue TargetLowering::scalarizeExtractedVectorLoad(EVT ResultVT, + const SDLoc &DL, + EVT InVecVT, SDValue EltNo, + LoadSDNode *OriginalLoad, + SelectionDAG &DAG) const { + assert(OriginalLoad->isSimple()); + + EVT VecEltVT = InVecVT.getVectorElementType(); + + // If the vector element type is not a multiple of a byte then we are unable + // to correctly compute an address to load only the extracted element as a + // scalar. + if (!VecEltVT.isByteSized()) + return SDValue(); + + ISD::LoadExtType ExtTy = + ResultVT.bitsGT(VecEltVT) ? ISD::EXTLOAD : ISD::NON_EXTLOAD; + if (!isOperationLegalOrCustom(ISD::LOAD, VecEltVT) || + !shouldReduceLoadWidth(OriginalLoad, ExtTy, VecEltVT)) + return SDValue(); + + Align Alignment = OriginalLoad->getAlign(); + MachinePointerInfo MPI; + if (auto *ConstEltNo = dyn_cast<ConstantSDNode>(EltNo)) { + int Elt = ConstEltNo->getZExtValue(); + unsigned PtrOff = VecEltVT.getSizeInBits() * Elt / 8; + MPI = OriginalLoad->getPointerInfo().getWithOffset(PtrOff); + Alignment = commonAlignment(Alignment, PtrOff); + } else { + // Discard the pointer info except the address space because the memory + // operand can't represent this new access since the offset is variable. + MPI = MachinePointerInfo(OriginalLoad->getPointerInfo().getAddrSpace()); + Alignment = commonAlignment(Alignment, VecEltVT.getSizeInBits() / 8); + } + + unsigned IsFast = 0; + if (!allowsMemoryAccess(*DAG.getContext(), DAG.getDataLayout(), VecEltVT, + OriginalLoad->getAddressSpace(), Alignment, + OriginalLoad->getMemOperand()->getFlags(), &IsFast) || + !IsFast) + return SDValue(); + + SDValue NewPtr = + getVectorElementPointer(DAG, OriginalLoad->getBasePtr(), InVecVT, EltNo); + + // We are replacing a vector load with a scalar load. The new load must have + // identical memory op ordering to the original. + SDValue Load; + if (ResultVT.bitsGT(VecEltVT)) { + // If the result type of vextract is wider than the load, then issue an + // extending load instead. + ISD::LoadExtType ExtType = isLoadExtLegal(ISD::ZEXTLOAD, ResultVT, VecEltVT) + ? ISD::ZEXTLOAD + : ISD::EXTLOAD; + Load = DAG.getExtLoad(ExtType, DL, ResultVT, OriginalLoad->getChain(), + NewPtr, MPI, VecEltVT, Alignment, + OriginalLoad->getMemOperand()->getFlags(), + OriginalLoad->getAAInfo()); + DAG.makeEquivalentMemoryOrdering(OriginalLoad, Load); + } else { + // The result type is narrower or the same width as the vector element + Load = DAG.getLoad(VecEltVT, DL, OriginalLoad->getChain(), NewPtr, MPI, + Alignment, OriginalLoad->getMemOperand()->getFlags(), + OriginalLoad->getAAInfo()); + DAG.makeEquivalentMemoryOrdering(OriginalLoad, Load); + if (ResultVT.bitsLT(VecEltVT)) + Load = DAG.getNode(ISD::TRUNCATE, DL, ResultVT, Load); + else + Load = DAG.getBitcast(ResultVT, Load); + } + + return Load; +} |