aboutsummaryrefslogtreecommitdiff
path: root/llvm/lib/CodeGen
diff options
context:
space:
mode:
authormingmingl <mingmingl@google.com>2025-02-04 11:11:14 -0800
committermingmingl <mingmingl@google.com>2025-02-04 11:11:14 -0800
commite91747a92d27ecf799427bf563f9f64f7c4d2447 (patch)
tree7aa5a8a9170deec293e152bdf2be804399dcd612 /llvm/lib/CodeGen
parent3a8d9337d816aef41c3ca1484be8b933a71a3c46 (diff)
parent53d6e59b594639417cdbfcfa2d18cea64acb4009 (diff)
downloadllvm-users/mingmingl-llvm/spr/sdpglobalvariable.zip
llvm-users/mingmingl-llvm/spr/sdpglobalvariable.tar.gz
llvm-users/mingmingl-llvm/spr/sdpglobalvariable.tar.bz2
Merge branch 'main' into users/mingmingl-llvm/spr/sdpglobalvariableusers/mingmingl-llvm/spr/sdpglobalvariable
Diffstat (limited to 'llvm/lib/CodeGen')
-rw-r--r--llvm/lib/CodeGen/AsmPrinter/DwarfCompileUnit.cpp70
-rw-r--r--llvm/lib/CodeGen/CodeGen.cpp2
-rw-r--r--llvm/lib/CodeGen/GlobalISel/IRTranslator.cpp4
-rw-r--r--llvm/lib/CodeGen/MachineCopyPropagation.cpp68
-rw-r--r--llvm/lib/CodeGen/MachineUniformityAnalysis.cpp4
-rw-r--r--llvm/lib/CodeGen/ReachingDefAnalysis.cpp19
-rw-r--r--llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp103
-rw-r--r--llvm/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp59
-rw-r--r--llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp74
9 files changed, 243 insertions, 160 deletions
diff --git a/llvm/lib/CodeGen/AsmPrinter/DwarfCompileUnit.cpp b/llvm/lib/CodeGen/AsmPrinter/DwarfCompileUnit.cpp
index 2f96366..6cf05fd 100644
--- a/llvm/lib/CodeGen/AsmPrinter/DwarfCompileUnit.cpp
+++ b/llvm/lib/CodeGen/AsmPrinter/DwarfCompileUnit.cpp
@@ -33,6 +33,7 @@
#include "llvm/MC/MCSymbolWasm.h"
#include "llvm/MC/MachineLocation.h"
#include "llvm/Support/CommandLine.h"
+#include "llvm/Support/NVPTXAddrSpace.h"
#include "llvm/Target/TargetLoweringObjectFile.h"
#include "llvm/Target/TargetMachine.h"
#include "llvm/Target/TargetOptions.h"
@@ -75,6 +76,26 @@ static dwarf::Tag GetCompileUnitType(UnitKind Kind, DwarfDebug *DW) {
return dwarf::DW_TAG_compile_unit;
}
+/// Translate NVVM IR address space code to DWARF correspondent value
+static unsigned translateToNVVMDWARFAddrSpace(unsigned AddrSpace) {
+ switch (AddrSpace) {
+ case NVPTXAS::ADDRESS_SPACE_GENERIC:
+ return NVPTXAS::DWARF_ADDR_generic_space;
+ case NVPTXAS::ADDRESS_SPACE_GLOBAL:
+ return NVPTXAS::DWARF_ADDR_global_space;
+ case NVPTXAS::ADDRESS_SPACE_SHARED:
+ return NVPTXAS::DWARF_ADDR_shared_space;
+ case NVPTXAS::ADDRESS_SPACE_CONST:
+ return NVPTXAS::DWARF_ADDR_const_space;
+ case NVPTXAS::ADDRESS_SPACE_LOCAL:
+ return NVPTXAS::DWARF_ADDR_local_space;
+ default:
+ llvm_unreachable(
+ "Cannot translate unknown address space to DWARF address space");
+ return AddrSpace;
+ }
+}
+
DwarfCompileUnit::DwarfCompileUnit(unsigned UID, const DICompileUnit *Node,
AsmPrinter *A, DwarfDebug *DW,
DwarfFile *DWU, UnitKind Kind)
@@ -264,14 +285,11 @@ void DwarfCompileUnit::addLocationAttribute(
}
if (Expr) {
- // According to
- // https://docs.nvidia.com/cuda/archive/10.0/ptx-writers-guide-to-interoperability/index.html#cuda-specific-dwarf
- // cuda-gdb requires DW_AT_address_class for all variables to be able to
- // correctly interpret address space of the variable address.
+ // cuda-gdb special requirement. See NVPTXAS::DWARF_AddressSpace
// Decode DW_OP_constu <DWARF Address Space> DW_OP_swap DW_OP_xderef
- // sequence for the NVPTX + gdb target.
- unsigned LocalNVPTXAddressSpace;
+ // sequence to specify corresponding address space.
if (Asm->TM.getTargetTriple().isNVPTX() && DD->tuneForGDB()) {
+ unsigned LocalNVPTXAddressSpace;
const DIExpression *NewExpr =
DIExpression::extractAddressClass(Expr, LocalNVPTXAddressSpace);
if (NewExpr != Expr) {
@@ -363,6 +381,10 @@ void DwarfCompileUnit::addLocationAttribute(
DD->addArangeLabel(SymbolCU(this, Sym));
addOpAddress(*Loc, Sym);
}
+ if (Asm->TM.getTargetTriple().isNVPTX() && DD->tuneForGDB() &&
+ !NVPTXAddressSpace)
+ NVPTXAddressSpace =
+ translateToNVVMDWARFAddrSpace(Global->getType()->getAddressSpace());
}
// Global variables attached to symbols are memory locations.
// It would be better if this were unconditional, but malformed input that
@@ -373,13 +395,9 @@ void DwarfCompileUnit::addLocationAttribute(
DwarfExpr->addExpression(Expr);
}
if (Asm->TM.getTargetTriple().isNVPTX() && DD->tuneForGDB()) {
- // According to
- // https://docs.nvidia.com/cuda/archive/10.0/ptx-writers-guide-to-interoperability/index.html#cuda-specific-dwarf
- // cuda-gdb requires DW_AT_address_class for all variables to be able to
- // correctly interpret address space of the variable address.
- const unsigned NVPTX_ADDR_global_space = 5;
+ // cuda-gdb special requirement. See NVPTXAS::DWARF_AddressSpace
addUInt(*VariableDIE, dwarf::DW_AT_address_class, dwarf::DW_FORM_data1,
- NVPTXAddressSpace.value_or(NVPTX_ADDR_global_space));
+ NVPTXAddressSpace.value_or(NVPTXAS::DWARF_ADDR_global_space));
}
if (Loc)
addBlock(*VariableDIE, dwarf::DW_AT_location, DwarfExpr->finalize());
@@ -793,10 +811,10 @@ void DwarfCompileUnit::applyConcreteDbgVariableAttributes(
const DbgValueLoc *DVal = &Single.getValueLoc();
if (Asm->TM.getTargetTriple().isNVPTX() && DD->tuneForGDB() &&
!Single.getExpr()) {
- // Lack of expression means it is a register. Registers for PTX need to
- // be marked with DW_AT_address_class = 2. See
- // https://docs.nvidia.com/cuda/archive/10.0/ptx-writers-guide-to-interoperability/index.html#cuda-specific-dwarf
- addUInt(VariableDie, dwarf::DW_AT_address_class, dwarf::DW_FORM_data1, 2);
+ // cuda-gdb special requirement. See NVPTXAS::DWARF_AddressSpace
+ // Lack of expression means it is a register.
+ addUInt(VariableDie, dwarf::DW_AT_address_class, dwarf::DW_FORM_data1,
+ NVPTXAS::DWARF_ADDR_reg_space);
}
if (!DVal->isVariadic()) {
const DbgValueLocEntry *Entry = DVal->getLocEntries().begin();
@@ -922,14 +940,11 @@ void DwarfCompileUnit::applyConcreteDbgVariableAttributes(const Loc::MMI &MMI,
SmallVector<uint64_t, 8> Ops;
TRI->getOffsetOpcodes(Offset, Ops);
- // According to
- // https://docs.nvidia.com/cuda/archive/10.0/ptx-writers-guide-to-interoperability/index.html#cuda-specific-dwarf
- // cuda-gdb requires DW_AT_address_class for all variables to be
- // able to correctly interpret address space of the variable
- // address. Decode DW_OP_constu <DWARF Address Space> DW_OP_swap
- // DW_OP_xderef sequence for the NVPTX + gdb target.
- unsigned LocalNVPTXAddressSpace;
+ // cuda-gdb special requirement. See NVPTXAS::DWARF_AddressSpace.
+ // Decode DW_OP_constu <DWARF Address Space> DW_OP_swap
+ // DW_OP_xderef sequence to specify address space.
if (Asm->TM.getTargetTriple().isNVPTX() && DD->tuneForGDB()) {
+ unsigned LocalNVPTXAddressSpace;
const DIExpression *NewExpr =
DIExpression::extractAddressClass(Expr, LocalNVPTXAddressSpace);
if (NewExpr != Expr) {
@@ -949,14 +964,9 @@ void DwarfCompileUnit::applyConcreteDbgVariableAttributes(const Loc::MMI &MMI,
DwarfExpr.addExpression(std::move(Cursor));
}
if (Asm->TM.getTargetTriple().isNVPTX() && DD->tuneForGDB()) {
- // According to
- // https://docs.nvidia.com/cuda/archive/10.0/ptx-writers-guide-to-interoperability/index.html#cuda-specific-dwarf
- // cuda-gdb requires DW_AT_address_class for all variables to be
- // able to correctly interpret address space of the variable
- // address.
- const unsigned NVPTX_ADDR_local_space = 6;
+ // cuda-gdb special requirement. See NVPTXAS::DWARF_AddressSpace.
addUInt(VariableDie, dwarf::DW_AT_address_class, dwarf::DW_FORM_data1,
- NVPTXAddressSpace.value_or(NVPTX_ADDR_local_space));
+ NVPTXAddressSpace.value_or(NVPTXAS::DWARF_ADDR_local_space));
}
addBlock(VariableDie, dwarf::DW_AT_location, DwarfExpr.finalize());
if (DwarfExpr.TagOffset)
diff --git a/llvm/lib/CodeGen/CodeGen.cpp b/llvm/lib/CodeGen/CodeGen.cpp
index 5f0c7ec9c..0a7937e 100644
--- a/llvm/lib/CodeGen/CodeGen.cpp
+++ b/llvm/lib/CodeGen/CodeGen.cpp
@@ -77,7 +77,7 @@ void llvm::initializeCodeGen(PassRegistry &Registry) {
initializeMachineCFGPrinterPass(Registry);
initializeMachineCSELegacyPass(Registry);
initializeMachineCombinerPass(Registry);
- initializeMachineCopyPropagationPass(Registry);
+ initializeMachineCopyPropagationLegacyPass(Registry);
initializeMachineCycleInfoPrinterPassPass(Registry);
initializeMachineCycleInfoWrapperPassPass(Registry);
initializeMachineDominatorTreeWrapperPassPass(Registry);
diff --git a/llvm/lib/CodeGen/GlobalISel/IRTranslator.cpp b/llvm/lib/CodeGen/GlobalISel/IRTranslator.cpp
index 3e43299..362d856 100644
--- a/llvm/lib/CodeGen/GlobalISel/IRTranslator.cpp
+++ b/llvm/lib/CodeGen/GlobalISel/IRTranslator.cpp
@@ -2441,9 +2441,7 @@ bool IRTranslator::translateKnownIntrinsic(const CallInst &CI, Intrinsic::ID ID,
return true;
}
case Intrinsic::invariant_start: {
- LLT PtrTy = getLLTForType(*CI.getArgOperand(0)->getType(), *DL);
- Register Undef = MRI->createGenericVirtualRegister(PtrTy);
- MIRBuilder.buildUndef(Undef);
+ MIRBuilder.buildUndef(getOrCreateVReg(CI));
return true;
}
case Intrinsic::invariant_end:
diff --git a/llvm/lib/CodeGen/MachineCopyPropagation.cpp b/llvm/lib/CodeGen/MachineCopyPropagation.cpp
index d44b064..460749a 100644
--- a/llvm/lib/CodeGen/MachineCopyPropagation.cpp
+++ b/llvm/lib/CodeGen/MachineCopyPropagation.cpp
@@ -48,6 +48,7 @@
//
//===----------------------------------------------------------------------===//
+#include "llvm/CodeGen/MachineCopyPropagation.h"
#include "llvm/ADT/DenseMap.h"
#include "llvm/ADT/STLExtras.h"
#include "llvm/ADT/SetVector.h"
@@ -449,7 +450,7 @@ public:
}
};
-class MachineCopyPropagation : public MachineFunctionPass {
+class MachineCopyPropagation {
const TargetRegisterInfo *TRI = nullptr;
const TargetInstrInfo *TII = nullptr;
const MachineRegisterInfo *MRI = nullptr;
@@ -458,24 +459,10 @@ class MachineCopyPropagation : public MachineFunctionPass {
bool UseCopyInstr;
public:
- static char ID; // Pass identification, replacement for typeid
-
MachineCopyPropagation(bool CopyInstr = false)
- : MachineFunctionPass(ID), UseCopyInstr(CopyInstr || MCPUseCopyInstr) {
- initializeMachineCopyPropagationPass(*PassRegistry::getPassRegistry());
- }
-
- void getAnalysisUsage(AnalysisUsage &AU) const override {
- AU.setPreservesCFG();
- MachineFunctionPass::getAnalysisUsage(AU);
- }
-
- bool runOnMachineFunction(MachineFunction &MF) override;
+ : UseCopyInstr(CopyInstr || MCPUseCopyInstr) {}
- MachineFunctionProperties getRequiredProperties() const override {
- return MachineFunctionProperties().set(
- MachineFunctionProperties::Property::NoVRegs);
- }
+ bool run(MachineFunction &MF);
private:
typedef enum { DebugUse = false, RegularUse = true } DebugType;
@@ -510,13 +497,35 @@ private:
bool Changed = false;
};
+class MachineCopyPropagationLegacy : public MachineFunctionPass {
+ bool UseCopyInstr;
+
+public:
+ static char ID; // pass identification
+
+ MachineCopyPropagationLegacy(bool UseCopyInstr = false)
+ : MachineFunctionPass(ID), UseCopyInstr(UseCopyInstr) {}
+
+ void getAnalysisUsage(AnalysisUsage &AU) const override {
+ AU.setPreservesCFG();
+ MachineFunctionPass::getAnalysisUsage(AU);
+ }
+
+ bool runOnMachineFunction(MachineFunction &MF) override;
+
+ MachineFunctionProperties getRequiredProperties() const override {
+ return MachineFunctionProperties().set(
+ MachineFunctionProperties::Property::NoVRegs);
+ }
+};
+
} // end anonymous namespace
-char MachineCopyPropagation::ID = 0;
+char MachineCopyPropagationLegacy::ID = 0;
-char &llvm::MachineCopyPropagationID = MachineCopyPropagation::ID;
+char &llvm::MachineCopyPropagationID = MachineCopyPropagationLegacy::ID;
-INITIALIZE_PASS(MachineCopyPropagation, DEBUG_TYPE,
+INITIALIZE_PASS(MachineCopyPropagationLegacy, DEBUG_TYPE,
"Machine Copy Propagation Pass", false, false)
void MachineCopyPropagation::ReadRegister(MCRegister Reg, MachineInstr &Reader,
@@ -1563,10 +1572,25 @@ void MachineCopyPropagation::EliminateSpillageCopies(MachineBasicBlock &MBB) {
Tracker.clear();
}
-bool MachineCopyPropagation::runOnMachineFunction(MachineFunction &MF) {
+bool MachineCopyPropagationLegacy::runOnMachineFunction(MachineFunction &MF) {
if (skipFunction(MF.getFunction()))
return false;
+ return MachineCopyPropagation(UseCopyInstr).run(MF);
+}
+
+PreservedAnalyses
+MachineCopyPropagationPass::run(MachineFunction &MF,
+ MachineFunctionAnalysisManager &) {
+ MFPropsModifier _(*this, MF);
+ if (!MachineCopyPropagation(UseCopyInstr).run(MF))
+ return PreservedAnalyses::all();
+ auto PA = getMachineFunctionPassPreservedAnalyses();
+ PA.preserveSet<CFGAnalyses>();
+ return PA;
+}
+
+bool MachineCopyPropagation::run(MachineFunction &MF) {
bool isSpillageCopyElimEnabled = false;
switch (EnableSpillageCopyElimination) {
case cl::BOU_UNSET:
@@ -1599,5 +1623,5 @@ bool MachineCopyPropagation::runOnMachineFunction(MachineFunction &MF) {
MachineFunctionPass *
llvm::createMachineCopyPropagationPass(bool UseCopyInstr = false) {
- return new MachineCopyPropagation(UseCopyInstr);
+ return new MachineCopyPropagationLegacy(UseCopyInstr);
}
diff --git a/llvm/lib/CodeGen/MachineUniformityAnalysis.cpp b/llvm/lib/CodeGen/MachineUniformityAnalysis.cpp
index a4b78c1..b5dc487 100644
--- a/llvm/lib/CodeGen/MachineUniformityAnalysis.cpp
+++ b/llvm/lib/CodeGen/MachineUniformityAnalysis.cpp
@@ -185,11 +185,11 @@ MachineUniformityAnalysisPass::MachineUniformityAnalysisPass()
}
INITIALIZE_PASS_BEGIN(MachineUniformityAnalysisPass, "machine-uniformity",
- "Machine Uniformity Info Analysis", true, true)
+ "Machine Uniformity Info Analysis", false, true)
INITIALIZE_PASS_DEPENDENCY(MachineCycleInfoWrapperPass)
INITIALIZE_PASS_DEPENDENCY(MachineDominatorTreeWrapperPass)
INITIALIZE_PASS_END(MachineUniformityAnalysisPass, "machine-uniformity",
- "Machine Uniformity Info Analysis", true, true)
+ "Machine Uniformity Info Analysis", false, true)
void MachineUniformityAnalysisPass::getAnalysisUsage(AnalysisUsage &AU) const {
AU.setPreservesAll();
diff --git a/llvm/lib/CodeGen/ReachingDefAnalysis.cpp b/llvm/lib/CodeGen/ReachingDefAnalysis.cpp
index fa60881..59ad9ff 100644
--- a/llvm/lib/CodeGen/ReachingDefAnalysis.cpp
+++ b/llvm/lib/CodeGen/ReachingDefAnalysis.cpp
@@ -147,16 +147,7 @@ void ReachingDefAnalysis::processDefs(MachineInstr *MI) {
assert(FrameIndex >= 0 && "Can't handle negative frame indicies yet!");
if (!isFIDef(*MI, FrameIndex, TII))
continue;
- if (MBBFrameObjsReachingDefs.contains(MBBNumber)) {
- auto Frame2InstrIdx = MBBFrameObjsReachingDefs[MBBNumber];
- if (Frame2InstrIdx.count(FrameIndex - ObjectIndexBegin) > 0)
- Frame2InstrIdx[FrameIndex - ObjectIndexBegin].push_back(CurInstr);
- else
- Frame2InstrIdx[FrameIndex - ObjectIndexBegin] = {CurInstr};
- } else {
- MBBFrameObjsReachingDefs[MBBNumber] = {
- {FrameIndex - ObjectIndexBegin, {CurInstr}}};
- }
+ MBBFrameObjsReachingDefs[{MBBNumber, FrameIndex}].push_back(CurInstr);
}
if (!isValidRegDef(MO))
continue;
@@ -351,9 +342,13 @@ int ReachingDefAnalysis::getReachingDef(MachineInstr *MI, Register Reg) const {
int LatestDef = ReachingDefDefaultVal;
if (Reg.isStack()) {
+ // Check that there was a reaching def.
int FrameIndex = Reg.stackSlotIndex();
- for (int Def : MBBFrameObjsReachingDefs.lookup(MBBNumber).lookup(
- FrameIndex - ObjectIndexBegin)) {
+ auto Lookup = MBBFrameObjsReachingDefs.find({MBBNumber, FrameIndex});
+ if (Lookup == MBBFrameObjsReachingDefs.end())
+ return LatestDef;
+ auto &Defs = Lookup->second;
+ for (int Def : Defs) {
if (Def >= InstId)
break;
DefRes = Def;
diff --git a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
index 882d6015..8858c20 100644
--- a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
@@ -385,17 +385,6 @@ namespace {
bool getTruncatedStoreValue(StoreSDNode *ST, SDValue &Val);
bool extendLoadedValueToExtension(LoadSDNode *LD, SDValue &Val);
- /// Replace an ISD::EXTRACT_VECTOR_ELT of a load with a narrowed
- /// load.
- ///
- /// \param EVE ISD::EXTRACT_VECTOR_ELT to be replaced.
- /// \param InVecVT type of the input vector to EVE with bitcasts resolved.
- /// \param EltNo index of the vector element to load.
- /// \param OriginalLoad load that EVE came from to be replaced.
- /// \returns EVE on success SDValue() on failure.
- SDValue scalarizeExtractedVectorLoad(SDNode *EVE, EVT InVecVT,
- SDValue EltNo,
- LoadSDNode *OriginalLoad);
void ReplaceLoadWithPromotedLoad(SDNode *Load, SDNode *ExtLoad);
SDValue PromoteOperand(SDValue Op, EVT PVT, bool &Replace);
SDValue SExtPromoteOperand(SDValue Op, EVT PVT);
@@ -22719,81 +22708,6 @@ SDValue DAGCombiner::visitINSERT_VECTOR_ELT(SDNode *N) {
return SDValue();
}
-SDValue DAGCombiner::scalarizeExtractedVectorLoad(SDNode *EVE, EVT InVecVT,
- SDValue EltNo,
- LoadSDNode *OriginalLoad) {
- assert(OriginalLoad->isSimple());
-
- EVT ResultVT = EVE->getValueType(0);
- EVT VecEltVT = InVecVT.getVectorElementType();
-
- // If the vector element type is not a multiple of a byte then we are unable
- // to correctly compute an address to load only the extracted element as a
- // scalar.
- if (!VecEltVT.isByteSized())
- return SDValue();
-
- ISD::LoadExtType ExtTy =
- ResultVT.bitsGT(VecEltVT) ? ISD::EXTLOAD : ISD::NON_EXTLOAD;
- if (!TLI.isOperationLegalOrCustom(ISD::LOAD, VecEltVT) ||
- !TLI.shouldReduceLoadWidth(OriginalLoad, ExtTy, VecEltVT))
- return SDValue();
-
- Align Alignment = OriginalLoad->getAlign();
- MachinePointerInfo MPI;
- SDLoc DL(EVE);
- if (auto *ConstEltNo = dyn_cast<ConstantSDNode>(EltNo)) {
- int Elt = ConstEltNo->getZExtValue();
- unsigned PtrOff = VecEltVT.getSizeInBits() * Elt / 8;
- MPI = OriginalLoad->getPointerInfo().getWithOffset(PtrOff);
- Alignment = commonAlignment(Alignment, PtrOff);
- } else {
- // Discard the pointer info except the address space because the memory
- // operand can't represent this new access since the offset is variable.
- MPI = MachinePointerInfo(OriginalLoad->getPointerInfo().getAddrSpace());
- Alignment = commonAlignment(Alignment, VecEltVT.getSizeInBits() / 8);
- }
-
- unsigned IsFast = 0;
- if (!TLI.allowsMemoryAccess(*DAG.getContext(), DAG.getDataLayout(), VecEltVT,
- OriginalLoad->getAddressSpace(), Alignment,
- OriginalLoad->getMemOperand()->getFlags(),
- &IsFast) ||
- !IsFast)
- return SDValue();
-
- SDValue NewPtr = TLI.getVectorElementPointer(DAG, OriginalLoad->getBasePtr(),
- InVecVT, EltNo);
-
- // We are replacing a vector load with a scalar load. The new load must have
- // identical memory op ordering to the original.
- SDValue Load;
- if (ResultVT.bitsGT(VecEltVT)) {
- // If the result type of vextract is wider than the load, then issue an
- // extending load instead.
- ISD::LoadExtType ExtType =
- TLI.isLoadExtLegal(ISD::ZEXTLOAD, ResultVT, VecEltVT) ? ISD::ZEXTLOAD
- : ISD::EXTLOAD;
- Load = DAG.getExtLoad(ExtType, DL, ResultVT, OriginalLoad->getChain(),
- NewPtr, MPI, VecEltVT, Alignment,
- OriginalLoad->getMemOperand()->getFlags(),
- OriginalLoad->getAAInfo());
- DAG.makeEquivalentMemoryOrdering(OriginalLoad, Load);
- } else {
- // The result type is narrower or the same width as the vector element
- Load = DAG.getLoad(VecEltVT, DL, OriginalLoad->getChain(), NewPtr, MPI,
- Alignment, OriginalLoad->getMemOperand()->getFlags(),
- OriginalLoad->getAAInfo());
- DAG.makeEquivalentMemoryOrdering(OriginalLoad, Load);
- if (ResultVT.bitsLT(VecEltVT))
- Load = DAG.getNode(ISD::TRUNCATE, DL, ResultVT, Load);
- else
- Load = DAG.getBitcast(ResultVT, Load);
- }
- ++OpsNarrowed;
- return Load;
-}
-
/// Transform a vector binary operation into a scalar binary operation by moving
/// the math/logic after an extract element of a vector.
static SDValue scalarizeExtractedBinOp(SDNode *ExtElt, SelectionDAG &DAG,
@@ -23272,8 +23186,13 @@ SDValue DAGCombiner::visitEXTRACT_VECTOR_ELT(SDNode *N) {
ISD::isNormalLoad(VecOp.getNode()) &&
!Index->hasPredecessor(VecOp.getNode())) {
auto *VecLoad = dyn_cast<LoadSDNode>(VecOp);
- if (VecLoad && VecLoad->isSimple())
- return scalarizeExtractedVectorLoad(N, VecVT, Index, VecLoad);
+ if (VecLoad && VecLoad->isSimple()) {
+ if (SDValue Scalarized = TLI.scalarizeExtractedVectorLoad(
+ ExtVT, SDLoc(N), VecVT, Index, VecLoad, DAG)) {
+ ++OpsNarrowed;
+ return Scalarized;
+ }
+ }
}
// Perform only after legalization to ensure build_vector / vector_shuffle
@@ -23361,7 +23280,13 @@ SDValue DAGCombiner::visitEXTRACT_VECTOR_ELT(SDNode *N) {
if (Elt == -1)
return DAG.getUNDEF(LVT);
- return scalarizeExtractedVectorLoad(N, VecVT, Index, LN0);
+ if (SDValue Scalarized =
+ TLI.scalarizeExtractedVectorLoad(LVT, DL, VecVT, Index, LN0, DAG)) {
+ ++OpsNarrowed;
+ return Scalarized;
+ }
+
+ return SDValue();
}
// Simplify (build_vec (ext )) to (bitcast (build_vec ))
diff --git a/llvm/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp b/llvm/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp
index 625052b..f1a91a7 100644
--- a/llvm/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp
@@ -566,6 +566,29 @@ SDValue DAGTypeLegalizer::PromoteIntRes_BITCAST(SDNode *N) {
}
}
+ // TODO: Handle big endian
+ if (!NOutVT.isVector() && InOp.getValueType().isVector() &&
+ DAG.getDataLayout().isLittleEndian()) {
+ // Pad the vector operand with undef and cast to a wider integer.
+ EVT EltVT = InOp.getValueType().getVectorElementType();
+ TypeSize EltSize = EltVT.getSizeInBits();
+ TypeSize OutSize = NOutVT.getSizeInBits();
+
+ if (OutSize.hasKnownScalarFactor(EltSize)) {
+ unsigned NumEltsWithPadding = OutSize.getKnownScalarFactor(EltSize);
+ EVT WideVecVT =
+ EVT::getVectorVT(*DAG.getContext(), EltVT, NumEltsWithPadding);
+
+ if (isTypeLegal(WideVecVT)) {
+ SDValue Inserted = DAG.getNode(ISD::INSERT_SUBVECTOR, dl, WideVecVT,
+ DAG.getUNDEF(WideVecVT), InOp,
+ DAG.getVectorIdxConstant(0, dl));
+
+ return DAG.getNode(ISD::BITCAST, dl, NOutVT, Inserted);
+ }
+ }
+ }
+
return DAG.getNode(ISD::ANY_EXTEND, dl, NOutVT,
CreateStackStoreLoad(InOp, OutVT));
}
@@ -2181,9 +2204,43 @@ SDValue DAGTypeLegalizer::PromoteIntOp_ATOMIC_STORE(AtomicSDNode *N) {
}
SDValue DAGTypeLegalizer::PromoteIntOp_BITCAST(SDNode *N) {
+ EVT OutVT = N->getValueType(0);
+ SDValue InOp = N->getOperand(0);
+ EVT InVT = InOp.getValueType();
+ EVT NInVT = TLI.getTypeToTransformTo(*DAG.getContext(), InVT);
+ SDLoc dl(N);
+
+ switch (getTypeAction(InVT)) {
+ case TargetLowering::TypePromoteInteger: {
+ // TODO: Handle big endian
+ if (OutVT.isVector() && DAG.getDataLayout().isLittleEndian()) {
+ EVT EltVT = OutVT.getVectorElementType();
+ TypeSize EltSize = EltVT.getSizeInBits();
+ TypeSize NInSize = NInVT.getSizeInBits();
+
+ if (NInSize.hasKnownScalarFactor(EltSize)) {
+ unsigned NumEltsWithPadding = NInSize.getKnownScalarFactor(EltSize);
+ EVT WideVecVT =
+ EVT::getVectorVT(*DAG.getContext(), EltVT, NumEltsWithPadding);
+
+ if (isTypeLegal(WideVecVT)) {
+ SDValue Promoted = GetPromotedInteger(InOp);
+ SDValue Cast = DAG.getNode(ISD::BITCAST, dl, WideVecVT, Promoted);
+ return DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, OutVT, Cast,
+ DAG.getVectorIdxConstant(0, dl));
+ }
+ }
+ }
+
+ break;
+ }
+ default:
+ break;
+ }
+
// This should only occur in unusual situations like bitcasting to an
// x86_fp80, so just turn it into a store+load
- return CreateStackStoreLoad(N->getOperand(0), N->getValueType(0));
+ return CreateStackStoreLoad(InOp, OutVT);
}
SDValue DAGTypeLegalizer::PromoteIntOp_BR_CC(SDNode *N, unsigned OpNo) {
diff --git a/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp b/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp
index 98206b7..adfb960 100644
--- a/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp
@@ -12114,3 +12114,77 @@ SDValue TargetLowering::expandVectorNaryOpBySplitting(SDNode *Node,
SDValue SplitOpHi = DAG.getNode(Opcode, DL, HiVT, HiOps);
return DAG.getNode(ISD::CONCAT_VECTORS, DL, VT, SplitOpLo, SplitOpHi);
}
+
+SDValue TargetLowering::scalarizeExtractedVectorLoad(EVT ResultVT,
+ const SDLoc &DL,
+ EVT InVecVT, SDValue EltNo,
+ LoadSDNode *OriginalLoad,
+ SelectionDAG &DAG) const {
+ assert(OriginalLoad->isSimple());
+
+ EVT VecEltVT = InVecVT.getVectorElementType();
+
+ // If the vector element type is not a multiple of a byte then we are unable
+ // to correctly compute an address to load only the extracted element as a
+ // scalar.
+ if (!VecEltVT.isByteSized())
+ return SDValue();
+
+ ISD::LoadExtType ExtTy =
+ ResultVT.bitsGT(VecEltVT) ? ISD::EXTLOAD : ISD::NON_EXTLOAD;
+ if (!isOperationLegalOrCustom(ISD::LOAD, VecEltVT) ||
+ !shouldReduceLoadWidth(OriginalLoad, ExtTy, VecEltVT))
+ return SDValue();
+
+ Align Alignment = OriginalLoad->getAlign();
+ MachinePointerInfo MPI;
+ if (auto *ConstEltNo = dyn_cast<ConstantSDNode>(EltNo)) {
+ int Elt = ConstEltNo->getZExtValue();
+ unsigned PtrOff = VecEltVT.getSizeInBits() * Elt / 8;
+ MPI = OriginalLoad->getPointerInfo().getWithOffset(PtrOff);
+ Alignment = commonAlignment(Alignment, PtrOff);
+ } else {
+ // Discard the pointer info except the address space because the memory
+ // operand can't represent this new access since the offset is variable.
+ MPI = MachinePointerInfo(OriginalLoad->getPointerInfo().getAddrSpace());
+ Alignment = commonAlignment(Alignment, VecEltVT.getSizeInBits() / 8);
+ }
+
+ unsigned IsFast = 0;
+ if (!allowsMemoryAccess(*DAG.getContext(), DAG.getDataLayout(), VecEltVT,
+ OriginalLoad->getAddressSpace(), Alignment,
+ OriginalLoad->getMemOperand()->getFlags(), &IsFast) ||
+ !IsFast)
+ return SDValue();
+
+ SDValue NewPtr =
+ getVectorElementPointer(DAG, OriginalLoad->getBasePtr(), InVecVT, EltNo);
+
+ // We are replacing a vector load with a scalar load. The new load must have
+ // identical memory op ordering to the original.
+ SDValue Load;
+ if (ResultVT.bitsGT(VecEltVT)) {
+ // If the result type of vextract is wider than the load, then issue an
+ // extending load instead.
+ ISD::LoadExtType ExtType = isLoadExtLegal(ISD::ZEXTLOAD, ResultVT, VecEltVT)
+ ? ISD::ZEXTLOAD
+ : ISD::EXTLOAD;
+ Load = DAG.getExtLoad(ExtType, DL, ResultVT, OriginalLoad->getChain(),
+ NewPtr, MPI, VecEltVT, Alignment,
+ OriginalLoad->getMemOperand()->getFlags(),
+ OriginalLoad->getAAInfo());
+ DAG.makeEquivalentMemoryOrdering(OriginalLoad, Load);
+ } else {
+ // The result type is narrower or the same width as the vector element
+ Load = DAG.getLoad(VecEltVT, DL, OriginalLoad->getChain(), NewPtr, MPI,
+ Alignment, OriginalLoad->getMemOperand()->getFlags(),
+ OriginalLoad->getAAInfo());
+ DAG.makeEquivalentMemoryOrdering(OriginalLoad, Load);
+ if (ResultVT.bitsLT(VecEltVT))
+ Load = DAG.getNode(ISD::TRUNCATE, DL, ResultVT, Load);
+ else
+ Load = DAG.getBitcast(ResultVT, Load);
+ }
+
+ return Load;
+}