aboutsummaryrefslogtreecommitdiff
path: root/llvm/lib/CodeGen
diff options
context:
space:
mode:
Diffstat (limited to 'llvm/lib/CodeGen')
-rw-r--r--llvm/lib/CodeGen/AsmPrinter/AsmPrinter.cpp6
-rw-r--r--llvm/lib/CodeGen/AsmPrinter/DIEHash.cpp2
-rw-r--r--llvm/lib/CodeGen/AsmPrinter/DwarfCompileUnit.cpp3
-rw-r--r--llvm/lib/CodeGen/AsmPrinter/DwarfDebug.cpp15
-rw-r--r--llvm/lib/CodeGen/AssignmentTrackingAnalysis.cpp4
-rw-r--r--llvm/lib/CodeGen/CodeGenPrepare.cpp2
-rw-r--r--llvm/lib/CodeGen/GlobalISel/CSEMIRBuilder.cpp2
-rw-r--r--llvm/lib/CodeGen/GlobalISel/IRTranslator.cpp44
-rw-r--r--llvm/lib/CodeGen/GlobalISel/Utils.cpp22
-rw-r--r--llvm/lib/CodeGen/RegisterCoalescer.cpp189
-rw-r--r--llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp126
-rw-r--r--llvm/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp5
-rw-r--r--llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.h2
-rw-r--r--llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp68
-rw-r--r--llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp39
-rw-r--r--llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp35
-rw-r--r--llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.h2
-rw-r--r--llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp53
-rw-r--r--llvm/lib/CodeGen/TargetInstrInfo.cpp2
-rw-r--r--llvm/lib/CodeGen/TargetLoweringBase.cpp3
-rw-r--r--llvm/lib/CodeGen/TargetLoweringObjectFileImpl.cpp25
21 files changed, 394 insertions, 255 deletions
diff --git a/llvm/lib/CodeGen/AsmPrinter/AsmPrinter.cpp b/llvm/lib/CodeGen/AsmPrinter/AsmPrinter.cpp
index 1641c3e..c72b6e8 100644
--- a/llvm/lib/CodeGen/AsmPrinter/AsmPrinter.cpp
+++ b/llvm/lib/CodeGen/AsmPrinter/AsmPrinter.cpp
@@ -3194,7 +3194,7 @@ void AsmPrinter::emitJumpTableSizesSection(const MachineJumpTableInfo &MJTI,
return;
if (isElf) {
- MCSymbolELF *LinkedToSym = dyn_cast<MCSymbolELF>(CurrentFnSym);
+ auto *LinkedToSym = static_cast<MCSymbolELF *>(CurrentFnSym);
int Flags = F.hasComdat() ? static_cast<int>(ELF::SHF_GROUP) : 0;
JumpTableSizesSection = OutContext.getELFSection(
@@ -4702,7 +4702,7 @@ void AsmPrinter::emitXRayTable() {
const Triple &TT = TM.getTargetTriple();
// Use PC-relative addresses on all targets.
if (TT.isOSBinFormatELF()) {
- auto LinkedToSym = cast<MCSymbolELF>(CurrentFnSym);
+ auto LinkedToSym = static_cast<const MCSymbolELF *>(CurrentFnSym);
auto Flags = ELF::SHF_ALLOC | ELF::SHF_LINK_ORDER;
StringRef GroupName;
if (F.hasComdat()) {
@@ -4825,7 +4825,7 @@ void AsmPrinter::emitPatchableFunctionEntries() {
Flags |= ELF::SHF_GROUP;
GroupName = F.getComdat()->getName();
}
- LinkedToSym = cast<MCSymbolELF>(CurrentFnSym);
+ LinkedToSym = static_cast<const MCSymbolELF *>(CurrentFnSym);
}
OutStreamer->switchSection(OutContext.getELFSection(
SectionName, ELF::SHT_PROGBITS, Flags, 0, GroupName, F.hasComdat(),
diff --git a/llvm/lib/CodeGen/AsmPrinter/DIEHash.cpp b/llvm/lib/CodeGen/AsmPrinter/DIEHash.cpp
index 08ed78e..a7491a2 100644
--- a/llvm/lib/CodeGen/AsmPrinter/DIEHash.cpp
+++ b/llvm/lib/CodeGen/AsmPrinter/DIEHash.cpp
@@ -230,7 +230,7 @@ void DIEHash::hashBlockData(const DIE::const_value_range &Values) {
"Base types referenced from DW_OP_convert should have a name");
hashNestedType(C, Name);
} else
- Hash.update((uint64_t)V.getDIEInteger().getValue());
+ Hash.update(V.getDIEInteger().getValue());
}
// Hash the contents of a loclistptr class.
diff --git a/llvm/lib/CodeGen/AsmPrinter/DwarfCompileUnit.cpp b/llvm/lib/CodeGen/AsmPrinter/DwarfCompileUnit.cpp
index 5577a7d..f9d7e76 100644
--- a/llvm/lib/CodeGen/AsmPrinter/DwarfCompileUnit.cpp
+++ b/llvm/lib/CodeGen/AsmPrinter/DwarfCompileUnit.cpp
@@ -508,7 +508,8 @@ void DwarfCompileUnit::addWasmRelocBaseGlobal(DIELoc *Loc, StringRef GlobalName,
// don't want to depend on target specific headers in this code?
const unsigned TI_GLOBAL_RELOC = 3;
unsigned PointerSize = Asm->getDataLayout().getPointerSize();
- auto *Sym = cast<MCSymbolWasm>(Asm->GetExternalSymbolSymbol(GlobalName));
+ auto *Sym =
+ static_cast<MCSymbolWasm *>(Asm->GetExternalSymbolSymbol(GlobalName));
// FIXME: this repeats what WebAssemblyMCInstLower::
// GetExternalSymbolSymbol does, since if there's no code that
// refers to this symbol, we have to set it here.
diff --git a/llvm/lib/CodeGen/AsmPrinter/DwarfDebug.cpp b/llvm/lib/CodeGen/AsmPrinter/DwarfDebug.cpp
index 7188833..5ae2d2a 100644
--- a/llvm/lib/CodeGen/AsmPrinter/DwarfDebug.cpp
+++ b/llvm/lib/CodeGen/AsmPrinter/DwarfDebug.cpp
@@ -940,14 +940,23 @@ void DwarfDebug::constructCallSiteEntryDIEs(const DISubprogram &SP,
// In the case of an indirect call find the register that holds
// the callee.
const MachineOperand &CalleeOp = TII->getCalleeOperand(MI);
- if (!CalleeOp.isGlobal() &&
- (!CalleeOp.isReg() || !CalleeOp.getReg().isPhysical()))
+ bool PhysRegCalleeOperand =
+ CalleeOp.isReg() && CalleeOp.getReg().isPhysical();
+ // Hack: WebAssembly CALL instructions have MCInstrDesc that does not
+ // describe the call target operand.
+ if (CalleeOp.getOperandNo() < MI.getDesc().operands().size()) {
+ const MCOperandInfo &MCOI =
+ MI.getDesc().operands()[CalleeOp.getOperandNo()];
+ PhysRegCalleeOperand =
+ PhysRegCalleeOperand && MCOI.OperandType == MCOI::OPERAND_REGISTER;
+ }
+ if (!CalleeOp.isGlobal() && !PhysRegCalleeOperand)
continue;
unsigned CallReg = 0;
const DISubprogram *CalleeSP = nullptr;
const Function *CalleeDecl = nullptr;
- if (CalleeOp.isReg()) {
+ if (PhysRegCalleeOperand) {
CallReg = CalleeOp.getReg();
if (!CallReg)
continue;
diff --git a/llvm/lib/CodeGen/AssignmentTrackingAnalysis.cpp b/llvm/lib/CodeGen/AssignmentTrackingAnalysis.cpp
index ff265b5..260ce8f 100644
--- a/llvm/lib/CodeGen/AssignmentTrackingAnalysis.cpp
+++ b/llvm/lib/CodeGen/AssignmentTrackingAnalysis.cpp
@@ -1403,7 +1403,7 @@ void AssignmentTrackingLowering::addMemDef(BlockInfo *LiveSet, VariableID Var,
const Assignment &AV) {
LiveSet->setAssignment(BlockInfo::Stack, Var, AV);
- // Use this assigment for all fragments contained within Var, but do not
+ // Use this assignment for all fragments contained within Var, but do not
// provide a Source because we cannot convert Var's value to a value for the
// fragment.
Assignment FragAV = AV;
@@ -1416,7 +1416,7 @@ void AssignmentTrackingLowering::addDbgDef(BlockInfo *LiveSet, VariableID Var,
const Assignment &AV) {
LiveSet->setAssignment(BlockInfo::Debug, Var, AV);
- // Use this assigment for all fragments contained within Var, but do not
+ // Use this assignment for all fragments contained within Var, but do not
// provide a Source because we cannot convert Var's value to a value for the
// fragment.
Assignment FragAV = AV;
diff --git a/llvm/lib/CodeGen/CodeGenPrepare.cpp b/llvm/lib/CodeGen/CodeGenPrepare.cpp
index f16283b..9223739 100644
--- a/llvm/lib/CodeGen/CodeGenPrepare.cpp
+++ b/llvm/lib/CodeGen/CodeGenPrepare.cpp
@@ -1834,7 +1834,7 @@ bool CodeGenPrepare::unfoldPowerOf2Test(CmpInst *Cmp) {
///
/// Return true if any changes are made.
static bool sinkCmpExpression(CmpInst *Cmp, const TargetLowering &TLI) {
- if (TLI.hasMultipleConditionRegisters())
+ if (TLI.hasMultipleConditionRegisters(EVT::getEVT(Cmp->getType())))
return false;
// Avoid sinking soft-FP comparisons, since this can move them into a loop.
diff --git a/llvm/lib/CodeGen/GlobalISel/CSEMIRBuilder.cpp b/llvm/lib/CodeGen/GlobalISel/CSEMIRBuilder.cpp
index 0f2c580..59c62cf 100644
--- a/llvm/lib/CodeGen/GlobalISel/CSEMIRBuilder.cpp
+++ b/llvm/lib/CodeGen/GlobalISel/CSEMIRBuilder.cpp
@@ -95,7 +95,7 @@ void CSEMIRBuilder::profileSrcOp(const SrcOp &Op,
GISelInstProfileBuilder &B) const {
switch (Op.getSrcOpKind()) {
case SrcOp::SrcType::Ty_Imm:
- B.addNodeIDImmediate(static_cast<int64_t>(Op.getImm()));
+ B.addNodeIDImmediate(Op.getImm());
break;
case SrcOp::SrcType::Ty_Predicate:
B.addNodeIDImmediate(static_cast<int64_t>(Op.getPredicate()));
diff --git a/llvm/lib/CodeGen/GlobalISel/IRTranslator.cpp b/llvm/lib/CodeGen/GlobalISel/IRTranslator.cpp
index fd38c30..bbfae57 100644
--- a/llvm/lib/CodeGen/GlobalISel/IRTranslator.cpp
+++ b/llvm/lib/CodeGen/GlobalISel/IRTranslator.cpp
@@ -1592,9 +1592,19 @@ bool IRTranslator::translateGetElementPtr(const User &U,
Type *OffsetIRTy = DL->getIndexType(PtrIRTy);
LLT OffsetTy = getLLTForType(*OffsetIRTy, *DL);
- uint32_t Flags = 0;
+ uint32_t PtrAddFlags = 0;
+ // Each PtrAdd generated to implement the GEP inherits its nuw, nusw, inbounds
+ // flags.
if (const Instruction *I = dyn_cast<Instruction>(&U))
- Flags = MachineInstr::copyFlagsFromInstruction(*I);
+ PtrAddFlags = MachineInstr::copyFlagsFromInstruction(*I);
+
+ auto PtrAddFlagsWithConst = [&](int64_t Offset) {
+ // For nusw/inbounds GEP with an offset that is nonnegative when interpreted
+ // as signed, assume there is no unsigned overflow.
+ if (Offset >= 0 && (PtrAddFlags & MachineInstr::MIFlag::NoUSWrap))
+ return PtrAddFlags | MachineInstr::MIFlag::NoUWrap;
+ return PtrAddFlags;
+ };
// Normalize Vector GEP - all scalar operands should be converted to the
// splat vector.
@@ -1644,7 +1654,9 @@ bool IRTranslator::translateGetElementPtr(const User &U,
if (Offset != 0) {
auto OffsetMIB = MIRBuilder.buildConstant({OffsetTy}, Offset);
- BaseReg = MIRBuilder.buildPtrAdd(PtrTy, BaseReg, OffsetMIB.getReg(0))
+ BaseReg = MIRBuilder
+ .buildPtrAdd(PtrTy, BaseReg, OffsetMIB.getReg(0),
+ PtrAddFlagsWithConst(Offset))
.getReg(0);
Offset = 0;
}
@@ -1668,12 +1680,23 @@ bool IRTranslator::translateGetElementPtr(const User &U,
if (ElementSize != 1) {
auto ElementSizeMIB = MIRBuilder.buildConstant(
getLLTForType(*OffsetIRTy, *DL), ElementSize);
+
+ // The multiplication is NUW if the GEP is NUW and NSW if the GEP is
+ // NUSW.
+ uint32_t ScaleFlags = PtrAddFlags & MachineInstr::MIFlag::NoUWrap;
+ if (PtrAddFlags & MachineInstr::MIFlag::NoUSWrap)
+ ScaleFlags |= MachineInstr::MIFlag::NoSWrap;
+
GepOffsetReg =
- MIRBuilder.buildMul(OffsetTy, IdxReg, ElementSizeMIB).getReg(0);
- } else
+ MIRBuilder.buildMul(OffsetTy, IdxReg, ElementSizeMIB, ScaleFlags)
+ .getReg(0);
+ } else {
GepOffsetReg = IdxReg;
+ }
- BaseReg = MIRBuilder.buildPtrAdd(PtrTy, BaseReg, GepOffsetReg).getReg(0);
+ BaseReg =
+ MIRBuilder.buildPtrAdd(PtrTy, BaseReg, GepOffsetReg, PtrAddFlags)
+ .getReg(0);
}
}
@@ -1681,11 +1704,8 @@ bool IRTranslator::translateGetElementPtr(const User &U,
auto OffsetMIB =
MIRBuilder.buildConstant(OffsetTy, Offset);
- if (Offset >= 0 && cast<GEPOperator>(U).isInBounds())
- Flags |= MachineInstr::MIFlag::NoUWrap;
-
MIRBuilder.buildPtrAdd(getOrCreateVReg(U), BaseReg, OffsetMIB.getReg(0),
- Flags);
+ PtrAddFlagsWithConst(Offset));
return true;
}
@@ -2189,8 +2209,8 @@ bool IRTranslator::translateKnownIntrinsic(const CallInst &CI, Intrinsic::ID ID,
unsigned Op = ID == Intrinsic::lifetime_start ? TargetOpcode::LIFETIME_START
: TargetOpcode::LIFETIME_END;
- const AllocaInst *AI = cast<AllocaInst>(CI.getArgOperand(1));
- if (!AI->isStaticAlloca())
+ const AllocaInst *AI = dyn_cast<AllocaInst>(CI.getArgOperand(1));
+ if (!AI || !AI->isStaticAlloca())
return true;
MIRBuilder.buildInstr(Op).addFrameIndex(getOrCreateFrameIndex(*AI));
diff --git a/llvm/lib/CodeGen/GlobalISel/Utils.cpp b/llvm/lib/CodeGen/GlobalISel/Utils.cpp
index f48bfc0..8955dd0 100644
--- a/llvm/lib/CodeGen/GlobalISel/Utils.cpp
+++ b/llvm/lib/CodeGen/GlobalISel/Utils.cpp
@@ -1401,6 +1401,21 @@ bool llvm::isBuildVectorConstantSplat(const Register Reg,
return false;
}
+bool llvm::isBuildVectorConstantSplat(const Register Reg,
+ const MachineRegisterInfo &MRI,
+ APInt SplatValue, bool AllowUndef) {
+ if (auto SplatValAndReg = getAnyConstantSplat(Reg, MRI, AllowUndef)) {
+ if (SplatValAndReg->Value.getBitWidth() < SplatValue.getBitWidth())
+ return APInt::isSameValue(
+ SplatValAndReg->Value.sext(SplatValue.getBitWidth()), SplatValue);
+ return APInt::isSameValue(
+ SplatValAndReg->Value,
+ SplatValue.sext(SplatValAndReg->Value.getBitWidth()));
+ }
+
+ return false;
+}
+
bool llvm::isBuildVectorConstantSplat(const MachineInstr &MI,
const MachineRegisterInfo &MRI,
int64_t SplatValue, bool AllowUndef) {
@@ -1408,6 +1423,13 @@ bool llvm::isBuildVectorConstantSplat(const MachineInstr &MI,
AllowUndef);
}
+bool llvm::isBuildVectorConstantSplat(const MachineInstr &MI,
+ const MachineRegisterInfo &MRI,
+ APInt SplatValue, bool AllowUndef) {
+ return isBuildVectorConstantSplat(MI.getOperand(0).getReg(), MRI, SplatValue,
+ AllowUndef);
+}
+
std::optional<APInt>
llvm::getIConstantSplatVal(const Register Reg, const MachineRegisterInfo &MRI) {
if (auto SplatValAndReg =
diff --git a/llvm/lib/CodeGen/RegisterCoalescer.cpp b/llvm/lib/CodeGen/RegisterCoalescer.cpp
index 7ede564..514f2f0 100644
--- a/llvm/lib/CodeGen/RegisterCoalescer.cpp
+++ b/llvm/lib/CodeGen/RegisterCoalescer.cpp
@@ -306,12 +306,7 @@ class RegisterCoalescer : private LiveRangeEdit::Delegate {
/// number if it is not zero. If DstReg is a physical register and the
/// existing subregister number of the def / use being updated is not zero,
/// make sure to set it to the correct physical subregister.
- ///
- /// If \p SubregToRegSrcInst is not empty, we are coalescing a
- /// `DstReg = SUBREG_TO_REG SrcReg`, which should introduce an
- /// implicit-def of DstReg on instructions that define SrcReg.
- void updateRegDefsUses(Register SrcReg, Register DstReg, unsigned SubIdx,
- ArrayRef<MachineInstr *> SubregToRegSrcInst = {});
+ void updateRegDefsUses(Register SrcReg, Register DstReg, unsigned SubIdx);
/// If the given machine operand reads only undefined lanes add an undef
/// flag.
@@ -1448,7 +1443,6 @@ bool RegisterCoalescer::reMaterializeTrivialDef(const CoalescerPair &CP,
// CopyMI may have implicit operands, save them so that we can transfer them
// over to the newly materialized instruction after CopyMI is removed.
- LaneBitmask NewMIImplicitOpsMask;
SmallVector<MachineOperand, 4> ImplicitOps;
ImplicitOps.reserve(CopyMI->getNumOperands() -
CopyMI->getDesc().getNumOperands());
@@ -1463,9 +1457,6 @@ bool RegisterCoalescer::reMaterializeTrivialDef(const CoalescerPair &CP,
(MO.getSubReg() == 0 && MO.getReg() == DstOperand.getReg())) &&
"unexpected implicit virtual register def");
ImplicitOps.push_back(MO);
- if (MO.isDef() && MO.getReg().isVirtual() &&
- MRI->shouldTrackSubRegLiveness(DstReg))
- NewMIImplicitOpsMask |= MRI->getMaxLaneMaskForVReg(MO.getReg());
}
}
@@ -1508,11 +1499,14 @@ bool RegisterCoalescer::reMaterializeTrivialDef(const CoalescerPair &CP,
} else {
assert(MO.getReg() == NewMI.getOperand(0).getReg());
- // If lanemasks need to be tracked, compile the lanemask of the NewMI
- // implicit def operands to avoid subranges for the super-regs from
- // being removed by code later on in this function.
- if (MRI->shouldTrackSubRegLiveness(MO.getReg()))
- NewMIImplicitOpsMask |= MRI->getMaxLaneMaskForVReg(MO.getReg());
+ // We're only expecting another def of the main output, so the range
+ // should get updated with the regular output range.
+ //
+ // FIXME: The range updating below probably needs updating to look at
+ // the super register if subranges are tracked.
+ assert(!MRI->shouldTrackSubRegLiveness(DstReg) &&
+ "subrange update for implicit-def of super register may not be "
+ "properly handled");
}
}
}
@@ -1612,8 +1606,7 @@ bool RegisterCoalescer::reMaterializeTrivialDef(const CoalescerPair &CP,
CurrIdx.getRegSlot(NewMI.getOperand(0).isEarlyClobber());
VNInfo::Allocator &Alloc = LIS->getVNInfoAllocator();
for (LiveInterval::SubRange &SR : DstInt.subranges()) {
- if ((SR.LaneMask & DstMask).none() &&
- (SR.LaneMask & NewMIImplicitOpsMask).none()) {
+ if ((SR.LaneMask & DstMask).none()) {
LLVM_DEBUG(dbgs()
<< "Removing undefined SubRange "
<< PrintLaneMask(SR.LaneMask) << " : " << SR << "\n");
@@ -1631,11 +1624,11 @@ bool RegisterCoalescer::reMaterializeTrivialDef(const CoalescerPair &CP,
UpdatedSubRanges = true;
} else {
// We know that this lane is defined by this instruction,
- // but at this point it may be empty because it is not used by
- // anything. This happens when updateRegDefUses adds the missing
- // lanes. Assign that lane a dead def so that the interferences
- // are properly modeled.
- if (SR.empty())
+ // but at this point it might not be live because it was not defined
+ // by the original instruction. This happens when the
+ // rematerialization widens the defined register. Assign that lane a
+ // dead def so that the interferences are properly modeled.
+ if (!SR.liveAt(DefIndex))
SR.createDeadDef(DefIndex, Alloc);
}
}
@@ -1877,14 +1870,11 @@ void RegisterCoalescer::addUndefFlag(const LiveInterval &Int, SlotIndex UseIdx,
}
}
-void RegisterCoalescer::updateRegDefsUses(
- Register SrcReg, Register DstReg, unsigned SubIdx,
- ArrayRef<MachineInstr *> SubregToRegSrcInsts) {
+void RegisterCoalescer::updateRegDefsUses(Register SrcReg, Register DstReg,
+ unsigned SubIdx) {
bool DstIsPhys = DstReg.isPhysical();
LiveInterval *DstInt = DstIsPhys ? nullptr : &LIS->getInterval(DstReg);
- // Coalescing a COPY may expose reads of 'undef' subregisters.
- // If so, then explicitly propagate 'undef' to those operands.
if (DstInt && DstInt->hasSubRanges() && DstReg != SrcReg) {
for (MachineOperand &MO : MRI->reg_operands(DstReg)) {
if (MO.isUndef())
@@ -1901,15 +1891,6 @@ void RegisterCoalescer::updateRegDefsUses(
}
}
- // If DstInt already has a subrange for the unused lanes, then we shouldn't
- // create duplicate subranges when we update the interval for unused lanes.
- LaneBitmask DstIntLaneMask;
- if (DstInt && MRI->shouldTrackSubRegLiveness(DstReg)) {
- for (LiveInterval::SubRange &SR : DstInt->subranges())
- DstIntLaneMask |= SR.LaneMask;
- }
-
- // Go through all instructions to replace uses of 'SrcReg' by 'DstReg'.
SmallPtrSet<MachineInstr *, 8> Visited;
for (MachineRegisterInfo::reg_instr_iterator I = MRI->reg_instr_begin(SrcReg),
E = MRI->reg_instr_end();
@@ -1933,80 +1914,6 @@ void RegisterCoalescer::updateRegDefsUses(
if (DstInt && !Reads && SubIdx && !UseMI->isDebugInstr())
Reads = DstInt->liveAt(LIS->getInstructionIndex(*UseMI));
- bool RequiresImplicitRedef = false;
- if (!SubregToRegSrcInsts.empty()) {
- // We can only add an implicit-def and undef if the sub registers match,
- // e.g.
- // %0:gr32 = INSTX
- // %0.sub8:gr32 = INSTY // top 24 bits of %0 still defined
- // %1:gr64 = SUBREG_TO_REG 0, %0, %subreg.sub32
- //
- // This cannot be transformed into:
- // %1.sub32:gr64 = INSTX
- // undef %1.sub8:gr64 = INSTY , implicit-def %1
- //
- // Because that would thrash the top 24 bits of %1.sub32.
- if (is_contained(SubregToRegSrcInsts, UseMI) &&
- all_of(UseMI->defs(),
- [&SubIdx, &SrcReg](const MachineOperand &MO) -> bool {
- if (MO.getReg() != SrcReg || !MO.getSubReg() || MO.isUndef())
- return true;
- return SubIdx == MO.getSubReg();
- })) {
- // Add implicit-def of super-register to express that the whole
- // register is defined by the instruction.
- MachineInstrBuilder MIB(*MF, UseMI);
- MIB.addReg(DstReg, RegState::ImplicitDefine);
- RequiresImplicitRedef = true;
- }
-
- // If the coalesed instruction doesn't fully define the register, we need
- // to preserve the original super register liveness for SUBREG_TO_REG.
- //
- // We pretended SUBREG_TO_REG was a regular copy for coalescing purposes,
- // but it introduces liveness for other subregisters. Downstream users may
- // have been relying on those bits, so we need to ensure their liveness is
- // captured with a def of other lanes.
- if (DstInt && MRI->shouldTrackSubRegLiveness(DstReg)) {
- // First check if there is sufficient granularity in terms of subranges.
- LaneBitmask DstMask = MRI->getMaxLaneMaskForVReg(DstInt->reg());
- LaneBitmask UsedLanes = TRI->getSubRegIndexLaneMask(SubIdx);
- LaneBitmask UnusedLanes = DstMask & ~UsedLanes;
- if ((UnusedLanes & ~DstIntLaneMask).any()) {
- BumpPtrAllocator &Allocator = LIS->getVNInfoAllocator();
- DstInt->createSubRangeFrom(Allocator, UnusedLanes, *DstInt);
- DstIntLaneMask |= UnusedLanes;
- }
-
- // After duplicating the live ranges for the low/hi bits, we
- // need to update the subranges of the DstReg interval such that
- // for a case like this:
- //
- // entry:
- // 16B %1:gpr32 = INSTRUCTION (<=> UseMI)
- // :
- // if.then:
- // 32B %1:gpr32 = MOVIMM32 ..
- // 48B %0:gpr64 = SUBREG_TO_REG 0, %1, sub32
- //
- // Only the MOVIMM32 require a def of the top lanes and any intervals
- // for the top 32-bits of the def at 16B should be removed.
- for (LiveInterval::SubRange &SR : DstInt->subranges()) {
- if (!Writes || RequiresImplicitRedef ||
- (SR.LaneMask & UnusedLanes).none())
- continue;
-
- assert((SR.LaneMask & UnusedLanes) == SR.LaneMask &&
- "Unexpected lanemask. Subrange needs finer granularity");
-
- SlotIndex UseIdx = LIS->getInstructionIndex(*UseMI).getRegSlot(false);
- auto SegmentI = SR.find(UseIdx);
- if (SegmentI != SR.end())
- SR.removeSegment(SegmentI, true);
- }
- }
- }
-
// Replace SrcReg with DstReg in all UseMI operands.
for (unsigned Op : Ops) {
MachineOperand &MO = UseMI->getOperand(Op);
@@ -2015,7 +1922,7 @@ void RegisterCoalescer::updateRegDefsUses(
// turn a full def into a read-modify-write sub-register def and vice
// versa.
if (SubIdx && MO.isDef())
- MO.setIsUndef(!Reads || RequiresImplicitRedef);
+ MO.setIsUndef(!Reads);
// A subreg use of a partially undef (super) register may be a complete
// undef use now and then has to be marked that way.
@@ -2118,30 +2025,6 @@ void RegisterCoalescer::setUndefOnPrunedSubRegUses(LiveInterval &LI,
LIS->shrinkToUses(&LI);
}
-/// For a given use of value \p Idx, it returns the def in the current block,
-/// or otherwise all possible defs in preceding blocks.
-static bool FindDefInBlock(SmallPtrSetImpl<MachineBasicBlock *> &VisitedBlocks,
- SmallVector<MachineInstr *> &Instrs,
- LiveIntervals *LIS, LiveInterval &SrcInt,
- MachineBasicBlock *MBB, VNInfo *Idx) {
- if (!Idx->isPHIDef()) {
- MachineInstr *Def = LIS->getInstructionFromIndex(Idx->def);
- assert(Def && "Unable to find a def for SUBREG_TO_REG source operand");
- Instrs.push_back(Def);
- return true;
- }
-
- bool Any = false;
- if (VisitedBlocks.count(MBB))
- return false;
- VisitedBlocks.insert(MBB);
- for (MachineBasicBlock *Pred : MBB->predecessors()) {
- Any |= FindDefInBlock(VisitedBlocks, Instrs, LIS, SrcInt, Pred,
- SrcInt.getVNInfoBefore(LIS->getMBBEndIdx(Pred)));
- }
- return Any;
-}
-
bool RegisterCoalescer::joinCopy(
MachineInstr *CopyMI, bool &Again,
SmallPtrSetImpl<MachineInstr *> &CurrentErasedInstrs) {
@@ -2273,35 +2156,6 @@ bool RegisterCoalescer::joinCopy(
});
}
- SmallVector<MachineInstr *> SubregToRegSrcInsts;
- if (CopyMI->isSubregToReg()) {
- // For the case where the copy instruction is a SUBREG_TO_REG, e.g.
- //
- // %0:gpr32 = movimm32 ..
- // %1:gpr64 = SUBREG_TO_REG 0, %0, sub32
- // ...
- // %0:gpr32 = COPY <something>
- //
- // After joining liveranges, the original `movimm32` will need an
- // implicit-def to make it explicit that the entire register is written,
- // i.e.
- //
- // undef %0.sub32:gpr64 = movimm32 ..., implicit-def %0
- // ...
- // undef %0.sub32:gpr64 = COPY <something> // Note that this does not
- // // require an implicit-def,
- // // because it has nothing to
- // // do with the SUBREG_TO_REG.
- LiveInterval &SrcInt =
- LIS->getInterval(CP.isFlipped() ? CP.getDstReg() : CP.getSrcReg());
- SlotIndex SubregToRegSlotIdx = LIS->getInstructionIndex(*CopyMI);
- SmallPtrSet<MachineBasicBlock *, 8> VisitedBlocks;
- if (!FindDefInBlock(VisitedBlocks, SubregToRegSrcInsts, LIS, SrcInt,
- CopyMI->getParent(),
- SrcInt.Query(SubregToRegSlotIdx).valueIn()))
- llvm_unreachable("SUBREG_TO_REG src requires a def");
- }
-
ShrinkMask = LaneBitmask::getNone();
ShrinkMainRange = false;
@@ -2371,12 +2225,9 @@ bool RegisterCoalescer::joinCopy(
// Rewrite all SrcReg operands to DstReg.
// Also update DstReg operands to include DstIdx if it is set.
- if (CP.getDstIdx()) {
- assert(SubregToRegSrcInsts.empty() && "can this happen?");
+ if (CP.getDstIdx())
updateRegDefsUses(CP.getDstReg(), CP.getDstReg(), CP.getDstIdx());
- }
- updateRegDefsUses(CP.getSrcReg(), CP.getDstReg(), CP.getSrcIdx(),
- SubregToRegSrcInsts);
+ updateRegDefsUses(CP.getSrcReg(), CP.getDstReg(), CP.getSrcIdx());
// Shrink subregister ranges if necessary.
if (ShrinkMask.any()) {
diff --git a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
index 11e869a..d70e96938 100644
--- a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
@@ -4075,18 +4075,11 @@ SDValue DAGCombiner::visitSUB(SDNode *N) {
unsigned BitWidth = VT.getScalarSizeInBits();
SDLoc DL(N);
- auto PeekThroughFreeze = [](SDValue N) {
- if (N->getOpcode() == ISD::FREEZE && N.hasOneUse())
- return N->getOperand(0);
- return N;
- };
-
if (SDValue V = foldSubCtlzNot<EmptyMatchContext>(N, DAG))
return V;
// fold (sub x, x) -> 0
- // FIXME: Refactor this and xor and other similar operations together.
- if (PeekThroughFreeze(N0) == PeekThroughFreeze(N1))
+ if (N0 == N1)
return tryFoldToZero(DL, TLI, VT, DAG, LegalOperations);
// fold (sub c1, c2) -> c3
@@ -6499,19 +6492,21 @@ static unsigned getMinMaxOpcodeForFP(SDValue Operand1, SDValue Operand2,
// It is safe to use FMINNUM_IEEE/FMAXNUM_IEEE if all the operands
// are non NaN values.
if (((CC == ISD::SETLT || CC == ISD::SETLE) && (OrAndOpcode == ISD::OR)) ||
- ((CC == ISD::SETGT || CC == ISD::SETGE) && (OrAndOpcode == ISD::AND)))
+ ((CC == ISD::SETGT || CC == ISD::SETGE) && (OrAndOpcode == ISD::AND))) {
return arebothOperandsNotNan(Operand1, Operand2, DAG) &&
isFMAXNUMFMINNUM_IEEE
? ISD::FMINNUM_IEEE
: ISD::DELETED_NODE;
- else if (((CC == ISD::SETGT || CC == ISD::SETGE) &&
- (OrAndOpcode == ISD::OR)) ||
- ((CC == ISD::SETLT || CC == ISD::SETLE) &&
- (OrAndOpcode == ISD::AND)))
+ }
+
+ if (((CC == ISD::SETGT || CC == ISD::SETGE) && (OrAndOpcode == ISD::OR)) ||
+ ((CC == ISD::SETLT || CC == ISD::SETLE) && (OrAndOpcode == ISD::AND))) {
return arebothOperandsNotNan(Operand1, Operand2, DAG) &&
isFMAXNUMFMINNUM_IEEE
? ISD::FMAXNUM_IEEE
: ISD::DELETED_NODE;
+ }
+
// Both FMINNUM/FMAXNUM and FMINNUM_IEEE/FMAXNUM_IEEE handle quiet
// NaNs in the same way. But, FMINNUM/FMAXNUM and FMINNUM_IEEE/
// FMAXNUM_IEEE handle signaling NaNs differently. If we cannot prove
@@ -6521,24 +6516,24 @@ static unsigned getMinMaxOpcodeForFP(SDValue Operand1, SDValue Operand2,
// we can prove that we do not have any sNaNs, then we can do the
// optimization using FMINNUM_IEEE/FMAXNUM_IEEE for the following
// cases.
- else if (((CC == ISD::SETOLT || CC == ISD::SETOLE) &&
- (OrAndOpcode == ISD::OR)) ||
- ((CC == ISD::SETUGT || CC == ISD::SETUGE) &&
- (OrAndOpcode == ISD::AND)))
+ if (((CC == ISD::SETOLT || CC == ISD::SETOLE) && (OrAndOpcode == ISD::OR)) ||
+ ((CC == ISD::SETUGT || CC == ISD::SETUGE) && (OrAndOpcode == ISD::AND))) {
return isFMAXNUMFMINNUM ? ISD::FMINNUM
- : arebothOperandsNotSNan(Operand1, Operand2, DAG) &&
- isFMAXNUMFMINNUM_IEEE
- ? ISD::FMINNUM_IEEE
- : ISD::DELETED_NODE;
- else if (((CC == ISD::SETOGT || CC == ISD::SETOGE) &&
- (OrAndOpcode == ISD::OR)) ||
- ((CC == ISD::SETULT || CC == ISD::SETULE) &&
- (OrAndOpcode == ISD::AND)))
+ : arebothOperandsNotSNan(Operand1, Operand2, DAG) &&
+ isFMAXNUMFMINNUM_IEEE
+ ? ISD::FMINNUM_IEEE
+ : ISD::DELETED_NODE;
+ }
+
+ if (((CC == ISD::SETOGT || CC == ISD::SETOGE) && (OrAndOpcode == ISD::OR)) ||
+ ((CC == ISD::SETULT || CC == ISD::SETULE) && (OrAndOpcode == ISD::AND))) {
return isFMAXNUMFMINNUM ? ISD::FMAXNUM
- : arebothOperandsNotSNan(Operand1, Operand2, DAG) &&
- isFMAXNUMFMINNUM_IEEE
- ? ISD::FMAXNUM_IEEE
- : ISD::DELETED_NODE;
+ : arebothOperandsNotSNan(Operand1, Operand2, DAG) &&
+ isFMAXNUMFMINNUM_IEEE
+ ? ISD::FMAXNUM_IEEE
+ : ISD::DELETED_NODE;
+ }
+
return ISD::DELETED_NODE;
}
@@ -13184,14 +13179,14 @@ static SDValue combineVSelectWithAllOnesOrZeros(SDValue Cond, SDValue TVal,
// select Cond, -1, x → or Cond, x
if (IsTAllOne) {
- SDValue X = DAG.getBitcast(CondVT, FVal);
+ SDValue X = DAG.getBitcast(CondVT, DAG.getFreeze(FVal));
SDValue Or = DAG.getNode(ISD::OR, DL, CondVT, Cond, X);
return DAG.getBitcast(VT, Or);
}
// select Cond, x, 0 → and Cond, x
if (IsFAllZero) {
- SDValue X = DAG.getBitcast(CondVT, TVal);
+ SDValue X = DAG.getBitcast(CondVT, DAG.getFreeze(TVal));
SDValue And = DAG.getNode(ISD::AND, DL, CondVT, Cond, X);
return DAG.getBitcast(VT, And);
}
@@ -13199,7 +13194,7 @@ static SDValue combineVSelectWithAllOnesOrZeros(SDValue Cond, SDValue TVal,
// select Cond, 0, x -> and not(Cond), x
if (IsTAllZero &&
(isBitwiseNot(peekThroughBitcasts(Cond)) || TLI.hasAndNot(Cond))) {
- SDValue X = DAG.getBitcast(CondVT, FVal);
+ SDValue X = DAG.getBitcast(CondVT, DAG.getFreeze(FVal));
SDValue And =
DAG.getNode(ISD::AND, DL, CondVT, DAG.getNOT(DL, Cond, CondVT), X);
return DAG.getBitcast(VT, And);
@@ -16754,6 +16749,17 @@ SDValue DAGCombiner::visitFREEZE(SDNode *N) {
if (DAG.isGuaranteedNotToBeUndefOrPoison(N0, /*PoisonOnly*/ false))
return N0;
+ // If we have frozen and unfrozen users of N0, update so everything uses N.
+ if (!N0.isUndef() && !N0.hasOneUse()) {
+ SDValue FrozenN0(N, 0);
+ DAG.ReplaceAllUsesOfValueWith(N0, FrozenN0);
+ // ReplaceAllUsesOfValueWith will have also updated the use in N, thus
+ // creating a cycle in a DAG. Let's undo that by mutating the freeze.
+ assert(N->getOperand(0) == FrozenN0 && "Expected cycle in DAG");
+ DAG.UpdateNodeOperands(N, N0);
+ return FrozenN0;
+ }
+
// We currently avoid folding freeze over SRA/SRL, due to the problems seen
// with (freeze (assert ext)) blocking simplifications of SRA/SRL. See for
// example https://reviews.llvm.org/D136529#4120959.
@@ -16807,8 +16813,7 @@ SDValue DAGCombiner::visitFREEZE(SDNode *N) {
SmallSet<SDValue, 8> MaybePoisonOperands;
SmallVector<unsigned, 8> MaybePoisonOperandNumbers;
for (auto [OpNo, Op] : enumerate(N0->ops())) {
- if (DAG.isGuaranteedNotToBeUndefOrPoison(Op, /*PoisonOnly*/ false,
- /*Depth*/ 1))
+ if (DAG.isGuaranteedNotToBeUndefOrPoison(Op, /*PoisonOnly=*/false))
continue;
bool HadMaybePoisonOperands = !MaybePoisonOperands.empty();
bool IsNewMaybePoisonOperand = MaybePoisonOperands.insert(Op).second;
@@ -22534,6 +22539,56 @@ SDValue DAGCombiner::visitATOMIC_STORE(SDNode *N) {
return SDValue();
}
+static SDValue foldToMaskedStore(StoreSDNode *Store, SelectionDAG &DAG,
+ const SDLoc &Dl) {
+ if (!Store->isSimple() || !ISD::isNormalStore(Store))
+ return SDValue();
+
+ SDValue StoredVal = Store->getValue();
+ SDValue StorePtr = Store->getBasePtr();
+ SDValue StoreOffset = Store->getOffset();
+ EVT VT = Store->getMemoryVT();
+ unsigned AddrSpace = Store->getAddressSpace();
+ Align Alignment = Store->getAlign();
+ const TargetLowering &TLI = DAG.getTargetLoweringInfo();
+
+ if (!TLI.isOperationLegalOrCustom(ISD::MSTORE, VT) ||
+ !TLI.allowsMisalignedMemoryAccesses(VT, AddrSpace, Alignment))
+ return SDValue();
+
+ SDValue Mask, OtherVec, LoadCh;
+ unsigned LoadPos;
+ if (sd_match(StoredVal,
+ m_VSelect(m_Value(Mask), m_Value(OtherVec),
+ m_Load(m_Value(LoadCh), m_Specific(StorePtr),
+ m_Specific(StoreOffset))))) {
+ LoadPos = 2;
+ } else if (sd_match(StoredVal,
+ m_VSelect(m_Value(Mask),
+ m_Load(m_Value(LoadCh), m_Specific(StorePtr),
+ m_Specific(StoreOffset)),
+ m_Value(OtherVec)))) {
+ LoadPos = 1;
+ } else {
+ return SDValue();
+ }
+
+ auto *Load = cast<LoadSDNode>(StoredVal.getOperand(LoadPos));
+ if (!Load->isSimple() || !ISD::isNormalLoad(Load) ||
+ Load->getAddressSpace() != AddrSpace)
+ return SDValue();
+
+ if (!Store->getChain().reachesChainWithoutSideEffects(LoadCh))
+ return SDValue();
+
+ if (LoadPos == 1)
+ Mask = DAG.getNOT(Dl, Mask, Mask.getValueType());
+
+ return DAG.getMaskedStore(Store->getChain(), Dl, OtherVec, StorePtr,
+ StoreOffset, Mask, VT, Store->getMemOperand(),
+ Store->getAddressingMode());
+}
+
SDValue DAGCombiner::visitSTORE(SDNode *N) {
StoreSDNode *ST = cast<StoreSDNode>(N);
SDValue Chain = ST->getChain();
@@ -22768,6 +22823,9 @@ SDValue DAGCombiner::visitSTORE(SDNode *N) {
if (SDValue NewSt = splitMergedValStore(ST))
return NewSt;
+ if (SDValue MaskedStore = foldToMaskedStore(ST, DAG, SDLoc(N)))
+ return MaskedStore;
+
return ReduceLoadOpStoreWidth(N);
}
diff --git a/llvm/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp b/llvm/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp
index 583a85a..a5bd97a 100644
--- a/llvm/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp
@@ -2217,8 +2217,9 @@ SDValue DAGTypeLegalizer::PromoteIntOp_BITCAST(SDNode *N) {
switch (getTypeAction(InVT)) {
case TargetLowering::TypePromoteInteger: {
- // TODO: Handle big endian
- if (OutVT.isVector() && DAG.getDataLayout().isLittleEndian()) {
+ // TODO: Handle big endian & vector input type.
+ if (OutVT.isVector() && !InVT.isVector() &&
+ DAG.getDataLayout().isLittleEndian()) {
EVT EltVT = OutVT.getVectorElementType();
TypeSize EltSize = EltVT.getSizeInBits();
TypeSize NInSize = NInVT.getSizeInBits();
diff --git a/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.h b/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.h
index 2e13b18..63544e6 100644
--- a/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.h
+++ b/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.h
@@ -971,6 +971,7 @@ private:
void SplitVecRes_INSERT_VECTOR_ELT(SDNode *N, SDValue &Lo, SDValue &Hi);
void SplitVecRes_LOAD(LoadSDNode *LD, SDValue &Lo, SDValue &Hi);
void SplitVecRes_VP_LOAD(VPLoadSDNode *LD, SDValue &Lo, SDValue &Hi);
+ void SplitVecRes_VP_LOAD_FF(VPLoadFFSDNode *LD, SDValue &Lo, SDValue &Hi);
void SplitVecRes_VP_STRIDED_LOAD(VPStridedLoadSDNode *SLD, SDValue &Lo,
SDValue &Hi);
void SplitVecRes_MLOAD(MaskedLoadSDNode *MLD, SDValue &Lo, SDValue &Hi);
@@ -1075,6 +1076,7 @@ private:
SDValue WidenVecRes_INSERT_VECTOR_ELT(SDNode* N);
SDValue WidenVecRes_LOAD(SDNode* N);
SDValue WidenVecRes_VP_LOAD(VPLoadSDNode *N);
+ SDValue WidenVecRes_VP_LOAD_FF(VPLoadFFSDNode *N);
SDValue WidenVecRes_VP_STRIDED_LOAD(VPStridedLoadSDNode *N);
SDValue WidenVecRes_VECTOR_COMPRESS(SDNode *N);
SDValue WidenVecRes_MLOAD(MaskedLoadSDNode* N);
diff --git a/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp b/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp
index 1661814..bc2dbfb 100644
--- a/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp
@@ -1152,6 +1152,9 @@ void DAGTypeLegalizer::SplitVectorResult(SDNode *N, unsigned ResNo) {
case ISD::VP_LOAD:
SplitVecRes_VP_LOAD(cast<VPLoadSDNode>(N), Lo, Hi);
break;
+ case ISD::VP_LOAD_FF:
+ SplitVecRes_VP_LOAD_FF(cast<VPLoadFFSDNode>(N), Lo, Hi);
+ break;
case ISD::EXPERIMENTAL_VP_STRIDED_LOAD:
SplitVecRes_VP_STRIDED_LOAD(cast<VPStridedLoadSDNode>(N), Lo, Hi);
break;
@@ -2227,6 +2230,45 @@ void DAGTypeLegalizer::SplitVecRes_VP_LOAD(VPLoadSDNode *LD, SDValue &Lo,
ReplaceValueWith(SDValue(LD, 1), Ch);
}
+void DAGTypeLegalizer::SplitVecRes_VP_LOAD_FF(VPLoadFFSDNode *LD, SDValue &Lo,
+ SDValue &Hi) {
+ SDLoc dl(LD);
+ auto [LoVT, HiVT] = DAG.GetSplitDestVTs(LD->getValueType(0));
+
+ SDValue Ch = LD->getChain();
+ SDValue Ptr = LD->getBasePtr();
+ Align Alignment = LD->getBaseAlign();
+ SDValue Mask = LD->getMask();
+ SDValue EVL = LD->getVectorLength();
+
+ // Split Mask operand
+ SDValue MaskLo, MaskHi;
+ if (Mask.getOpcode() == ISD::SETCC) {
+ SplitVecRes_SETCC(Mask.getNode(), MaskLo, MaskHi);
+ } else {
+ if (getTypeAction(Mask.getValueType()) == TargetLowering::TypeSplitVector)
+ GetSplitVector(Mask, MaskLo, MaskHi);
+ else
+ std::tie(MaskLo, MaskHi) = DAG.SplitVector(Mask, dl);
+ }
+
+ // Split EVL operand
+ auto [EVLLo, EVLHi] = DAG.SplitEVL(EVL, LD->getValueType(0), dl);
+
+ MachineMemOperand *MMO = DAG.getMachineFunction().getMachineMemOperand(
+ LD->getPointerInfo(), MachineMemOperand::MOLoad,
+ LocationSize::beforeOrAfterPointer(), Alignment, LD->getAAInfo(),
+ LD->getRanges());
+
+ Lo = DAG.getLoadFFVP(LoVT, dl, Ch, Ptr, MaskLo, EVLLo, MMO);
+
+ // Fill the upper half with poison.
+ Hi = DAG.getUNDEF(HiVT);
+
+ ReplaceValueWith(SDValue(LD, 1), Lo.getValue(1));
+ ReplaceValueWith(SDValue(LD, 2), Lo.getValue(2));
+}
+
void DAGTypeLegalizer::SplitVecRes_VP_STRIDED_LOAD(VPStridedLoadSDNode *SLD,
SDValue &Lo, SDValue &Hi) {
assert(SLD->isUnindexed() &&
@@ -4707,6 +4749,9 @@ void DAGTypeLegalizer::WidenVectorResult(SDNode *N, unsigned ResNo) {
case ISD::VP_LOAD:
Res = WidenVecRes_VP_LOAD(cast<VPLoadSDNode>(N));
break;
+ case ISD::VP_LOAD_FF:
+ Res = WidenVecRes_VP_LOAD_FF(cast<VPLoadFFSDNode>(N));
+ break;
case ISD::EXPERIMENTAL_VP_STRIDED_LOAD:
Res = WidenVecRes_VP_STRIDED_LOAD(cast<VPStridedLoadSDNode>(N));
break;
@@ -6163,6 +6208,29 @@ SDValue DAGTypeLegalizer::WidenVecRes_VP_LOAD(VPLoadSDNode *N) {
return Res;
}
+SDValue DAGTypeLegalizer::WidenVecRes_VP_LOAD_FF(VPLoadFFSDNode *N) {
+ EVT WidenVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0));
+ SDValue Mask = N->getMask();
+ SDValue EVL = N->getVectorLength();
+ SDLoc dl(N);
+
+ // The mask should be widened as well
+ assert(getTypeAction(Mask.getValueType()) ==
+ TargetLowering::TypeWidenVector &&
+ "Unable to widen binary VP op");
+ Mask = GetWidenedVector(Mask);
+ assert(Mask.getValueType().getVectorElementCount() ==
+ TLI.getTypeToTransformTo(*DAG.getContext(), Mask.getValueType())
+ .getVectorElementCount() &&
+ "Unable to widen vector load");
+
+ SDValue Res = DAG.getLoadFFVP(WidenVT, dl, N->getChain(), N->getBasePtr(),
+ Mask, EVL, N->getMemOperand());
+ ReplaceValueWith(SDValue(N, 1), Res.getValue(1));
+ ReplaceValueWith(SDValue(N, 2), Res.getValue(2));
+ return Res;
+}
+
SDValue DAGTypeLegalizer::WidenVecRes_VP_STRIDED_LOAD(VPStridedLoadSDNode *N) {
SDLoc DL(N);
diff --git a/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp b/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
index f41b6eb..71a175d 100644
--- a/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
@@ -837,6 +837,14 @@ static void AddNodeIDCustom(FoldingSetNodeID &ID, const SDNode *N) {
ID.AddInteger(ELD->getMemOperand()->getFlags());
break;
}
+ case ISD::VP_LOAD_FF: {
+ const auto *LD = cast<VPLoadFFSDNode>(N);
+ ID.AddInteger(LD->getMemoryVT().getRawBits());
+ ID.AddInteger(LD->getRawSubclassData());
+ ID.AddInteger(LD->getPointerInfo().getAddrSpace());
+ ID.AddInteger(LD->getMemOperand()->getFlags());
+ break;
+ }
case ISD::VP_STORE: {
const VPStoreSDNode *EST = cast<VPStoreSDNode>(N);
ID.AddInteger(EST->getMemoryVT().getRawBits());
@@ -6351,8 +6359,7 @@ SDValue SelectionDAG::getNode(unsigned Opcode, const SDLoc &DL, EVT VT,
break;
case ISD::FREEZE:
assert(VT == N1.getValueType() && "Unexpected VT!");
- if (isGuaranteedNotToBeUndefOrPoison(N1, /*PoisonOnly*/ false,
- /*Depth*/ 1))
+ if (isGuaranteedNotToBeUndefOrPoison(N1, /*PoisonOnly=*/false))
return N1;
break;
case ISD::TokenFactor:
@@ -10434,6 +10441,34 @@ SDValue SelectionDAG::getMaskedHistogram(SDVTList VTs, EVT MemVT,
return V;
}
+SDValue SelectionDAG::getLoadFFVP(EVT VT, const SDLoc &DL, SDValue Chain,
+ SDValue Ptr, SDValue Mask, SDValue EVL,
+ MachineMemOperand *MMO) {
+ SDVTList VTs = getVTList(VT, EVL.getValueType(), MVT::Other);
+ SDValue Ops[] = {Chain, Ptr, Mask, EVL};
+ FoldingSetNodeID ID;
+ AddNodeIDNode(ID, ISD::VP_LOAD_FF, VTs, Ops);
+ ID.AddInteger(VT.getRawBits());
+ ID.AddInteger(getSyntheticNodeSubclassData<VPLoadFFSDNode>(DL.getIROrder(),
+ VTs, VT, MMO));
+ ID.AddInteger(MMO->getPointerInfo().getAddrSpace());
+ ID.AddInteger(MMO->getFlags());
+ void *IP = nullptr;
+ if (SDNode *E = FindNodeOrInsertPos(ID, DL, IP)) {
+ cast<VPLoadFFSDNode>(E)->refineAlignment(MMO);
+ return SDValue(E, 0);
+ }
+ auto *N = newSDNode<VPLoadFFSDNode>(DL.getIROrder(), DL.getDebugLoc(), VTs,
+ VT, MMO);
+ createOperands(N, Ops);
+
+ CSEMap.InsertNode(N, IP);
+ InsertNode(N);
+ SDValue V(N, 0);
+ NewSDValueDbgMsg(V, "Creating new node: ", this);
+ return V;
+}
+
SDValue SelectionDAG::getGetFPEnv(SDValue Chain, const SDLoc &dl, SDValue Ptr,
EVT MemVT, MachineMemOperand *MMO) {
assert(Chain.getValueType() == MVT::Other && "Invalid chain type");
diff --git a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp
index 306e068..d0815e9 100644
--- a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp
@@ -7598,7 +7598,9 @@ void SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I,
if (TM.getOptLevel() == CodeGenOptLevel::None)
return;
- const AllocaInst *LifetimeObject = cast<AllocaInst>(I.getArgOperand(1));
+ const AllocaInst *LifetimeObject = dyn_cast<AllocaInst>(I.getArgOperand(1));
+ if (!LifetimeObject)
+ return;
// First check that the Alloca is static, otherwise it won't have a
// valid frame index.
@@ -8440,6 +8442,34 @@ void SelectionDAGBuilder::visitVPLoad(
setValue(&VPIntrin, LD);
}
+void SelectionDAGBuilder::visitVPLoadFF(
+ const VPIntrinsic &VPIntrin, EVT VT, EVT EVLVT,
+ const SmallVectorImpl<SDValue> &OpValues) {
+ assert(OpValues.size() == 3 && "Unexpected number of operands");
+ SDLoc DL = getCurSDLoc();
+ Value *PtrOperand = VPIntrin.getArgOperand(0);
+ MaybeAlign Alignment = VPIntrin.getPointerAlignment();
+ AAMDNodes AAInfo = VPIntrin.getAAMetadata();
+ const MDNode *Ranges = VPIntrin.getMetadata(LLVMContext::MD_range);
+ SDValue LD;
+ // Do not serialize variable-length loads of constant memory with
+ // anything.
+ if (!Alignment)
+ Alignment = DAG.getEVTAlign(VT);
+ MemoryLocation ML = MemoryLocation::getAfter(PtrOperand, AAInfo);
+ bool AddToChain = !BatchAA || !BatchAA->pointsToConstantMemory(ML);
+ SDValue InChain = AddToChain ? DAG.getRoot() : DAG.getEntryNode();
+ MachineMemOperand *MMO = DAG.getMachineFunction().getMachineMemOperand(
+ MachinePointerInfo(PtrOperand), MachineMemOperand::MOLoad,
+ LocationSize::beforeOrAfterPointer(), *Alignment, AAInfo, Ranges);
+ LD = DAG.getLoadFFVP(VT, DL, InChain, OpValues[0], OpValues[1], OpValues[2],
+ MMO);
+ SDValue Trunc = DAG.getNode(ISD::TRUNCATE, DL, EVLVT, LD.getValue(1));
+ if (AddToChain)
+ PendingLoads.push_back(LD.getValue(2));
+ setValue(&VPIntrin, DAG.getMergeValues({LD.getValue(0), Trunc}, DL));
+}
+
void SelectionDAGBuilder::visitVPGather(
const VPIntrinsic &VPIntrin, EVT VT,
const SmallVectorImpl<SDValue> &OpValues) {
@@ -8673,6 +8703,9 @@ void SelectionDAGBuilder::visitVectorPredicationIntrinsic(
case ISD::VP_LOAD:
visitVPLoad(VPIntrin, ValueVTs[0], OpValues);
break;
+ case ISD::VP_LOAD_FF:
+ visitVPLoadFF(VPIntrin, ValueVTs[0], ValueVTs[1], OpValues);
+ break;
case ISD::VP_GATHER:
visitVPGather(VPIntrin, ValueVTs[0], OpValues);
break;
diff --git a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.h b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.h
index 1c27807..c251755 100644
--- a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.h
+++ b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.h
@@ -631,6 +631,8 @@ private:
void visitVectorExtractLastActive(const CallInst &I, unsigned Intrinsic);
void visitVPLoad(const VPIntrinsic &VPIntrin, EVT VT,
const SmallVectorImpl<SDValue> &OpValues);
+ void visitVPLoadFF(const VPIntrinsic &VPIntrin, EVT VT, EVT EVLVT,
+ const SmallVectorImpl<SDValue> &OpValues);
void visitVPStore(const VPIntrinsic &VPIntrin,
const SmallVectorImpl<SDValue> &OpValues);
void visitVPGather(const VPIntrinsic &VPIntrin, EVT VT,
diff --git a/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp b/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp
index 48d6b99..e235d14 100644
--- a/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp
@@ -775,13 +775,6 @@ SDValue TargetLowering::SimplifyMultipleUseDemandedBits(
break;
}
- case ISD::FREEZE: {
- SDValue N0 = Op.getOperand(0);
- if (DAG.isGuaranteedNotToBeUndefOrPoison(N0, DemandedElts,
- /*PoisonOnly=*/false, Depth + 1))
- return N0;
- break;
- }
case ISD::AND: {
LHSKnown = DAG.computeKnownBits(Op.getOperand(0), DemandedElts, Depth + 1);
RHSKnown = DAG.computeKnownBits(Op.getOperand(1), DemandedElts, Depth + 1);
@@ -5125,6 +5118,20 @@ SDValue TargetLowering::SimplifySetCC(EVT VT, SDValue N0, SDValue N1,
Cond == ISD::SETEQ ? ISD::SETLT : ISD::SETGE);
}
+ // fold (setcc (trunc x) c) -> (setcc x c)
+ if (N0.getOpcode() == ISD::TRUNCATE &&
+ ((N0->getFlags().hasNoUnsignedWrap() && !ISD::isSignedIntSetCC(Cond)) ||
+ (N0->getFlags().hasNoSignedWrap() &&
+ !ISD::isUnsignedIntSetCC(Cond))) &&
+ isTypeDesirableForOp(ISD::SETCC, N0.getOperand(0).getValueType())) {
+ EVT NewVT = N0.getOperand(0).getValueType();
+ SDValue NewConst = DAG.getConstant(ISD::isSignedIntSetCC(Cond)
+ ? C1.sext(NewVT.getSizeInBits())
+ : C1.zext(NewVT.getSizeInBits()),
+ dl, NewVT);
+ return DAG.getSetCC(dl, VT, N0.getOperand(0), NewConst, Cond);
+ }
+
if (SDValue V =
optimizeSetCCOfSignedTruncationCheck(VT, N0, N1, Cond, DCI, dl))
return V;
@@ -5363,10 +5370,25 @@ SDValue TargetLowering::SimplifySetCC(EVT VT, SDValue N0, SDValue N1,
if (AndRHSC.isNegatedPowerOf2() && C1.isSubsetOf(AndRHSC)) {
unsigned ShiftBits = AndRHSC.countr_zero();
if (!shouldAvoidTransformToShift(ShValTy, ShiftBits)) {
+ // If using an unsigned shift doesn't yield a legal compare
+ // immediate, try using sra instead.
+ APInt NewC = C1.lshr(ShiftBits);
+ if (NewC.getSignificantBits() <= 64 &&
+ !isLegalICmpImmediate(NewC.getSExtValue())) {
+ APInt SignedC = C1.ashr(ShiftBits);
+ if (SignedC.getSignificantBits() <= 64 &&
+ isLegalICmpImmediate(SignedC.getSExtValue())) {
+ SDValue Shift = DAG.getNode(
+ ISD::SRA, dl, ShValTy, N0.getOperand(0),
+ DAG.getShiftAmountConstant(ShiftBits, ShValTy, dl));
+ SDValue CmpRHS = DAG.getConstant(SignedC, dl, ShValTy);
+ return DAG.getSetCC(dl, VT, Shift, CmpRHS, Cond);
+ }
+ }
SDValue Shift = DAG.getNode(
ISD::SRL, dl, ShValTy, N0.getOperand(0),
DAG.getShiftAmountConstant(ShiftBits, ShValTy, dl));
- SDValue CmpRHS = DAG.getConstant(C1.lshr(ShiftBits), dl, ShValTy);
+ SDValue CmpRHS = DAG.getConstant(NewC, dl, ShValTy);
return DAG.getSetCC(dl, VT, Shift, CmpRHS, Cond);
}
}
@@ -5646,6 +5668,17 @@ SDValue TargetLowering::SimplifySetCC(EVT VT, SDValue N0, SDValue N1,
return N0;
}
+ // Fold (setcc (trunc x) (trunc y)) -> (setcc x y)
+ if (N0.getOpcode() == ISD::TRUNCATE && N1.getOpcode() == ISD::TRUNCATE &&
+ N0.getOperand(0).getValueType() == N1.getOperand(0).getValueType() &&
+ ((!ISD::isSignedIntSetCC(Cond) && N0->getFlags().hasNoUnsignedWrap() &&
+ N1->getFlags().hasNoUnsignedWrap()) ||
+ (!ISD::isUnsignedIntSetCC(Cond) && N0->getFlags().hasNoSignedWrap() &&
+ N1->getFlags().hasNoSignedWrap())) &&
+ isTypeDesirableForOp(ISD::SETCC, N0.getOperand(0).getValueType())) {
+ return DAG.getSetCC(dl, VT, N0.getOperand(0), N1.getOperand(0), Cond);
+ }
+
// Could not fold it.
return SDValue();
}
@@ -6482,8 +6515,8 @@ SDValue TargetLowering::buildSDIVPow2WithCMov(
Created.push_back(CMov.getNode());
// Divide by pow2.
- SDValue SRA =
- DAG.getNode(ISD::SRA, DL, VT, CMov, DAG.getConstant(Lg2, DL, VT));
+ SDValue SRA = DAG.getNode(ISD::SRA, DL, VT, CMov,
+ DAG.getShiftAmountConstant(Lg2, VT, DL));
// If we're dividing by a positive value, we're done. Otherwise, we must
// negate the result.
diff --git a/llvm/lib/CodeGen/TargetInstrInfo.cpp b/llvm/lib/CodeGen/TargetInstrInfo.cpp
index 705e046e..9e49ddd 100644
--- a/llvm/lib/CodeGen/TargetInstrInfo.cpp
+++ b/llvm/lib/CodeGen/TargetInstrInfo.cpp
@@ -993,7 +993,7 @@ static bool canCombine(MachineBasicBlock &MBB, MachineOperand &MO,
MI = MRI.getUniqueVRegDef(MO.getReg());
// And it needs to be in the trace (otherwise, it won't have a depth).
if (!MI || MI->getParent() != &MBB ||
- ((unsigned)MI->getOpcode() != CombineOpc && CombineOpc != 0))
+ (MI->getOpcode() != CombineOpc && CombineOpc != 0))
return false;
// Must only used by the user we combine with.
if (!MRI.hasOneNonDBGUse(MI->getOperand(0).getReg()))
diff --git a/llvm/lib/CodeGen/TargetLoweringBase.cpp b/llvm/lib/CodeGen/TargetLoweringBase.cpp
index 3c91b0e..9f525ea 100644
--- a/llvm/lib/CodeGen/TargetLoweringBase.cpp
+++ b/llvm/lib/CodeGen/TargetLoweringBase.cpp
@@ -697,7 +697,6 @@ TargetLoweringBase::TargetLoweringBase(const TargetMachine &tm)
MaxGluedStoresPerMemcpy = 0;
MaxStoresPerMemsetOptSize = MaxStoresPerMemcpyOptSize =
MaxStoresPerMemmoveOptSize = MaxLoadsPerMemcmpOptSize = 4;
- HasMultipleConditionRegisters = false;
HasExtractBitsInsn = false;
JumpIsExpensive = JumpIsExpensiveOverride;
PredictableSelectIsExpensive = false;
@@ -905,6 +904,8 @@ void TargetLoweringBase::initActions() {
setOperationAction(ISD::GET_FPENV, VT, Expand);
setOperationAction(ISD::SET_FPENV, VT, Expand);
setOperationAction(ISD::RESET_FPENV, VT, Expand);
+
+ setOperationAction(ISD::MSTORE, VT, Expand);
}
// Most targets ignore the @llvm.prefetch intrinsic.
diff --git a/llvm/lib/CodeGen/TargetLoweringObjectFileImpl.cpp b/llvm/lib/CodeGen/TargetLoweringObjectFileImpl.cpp
index e9172f4..d19ef92 100644
--- a/llvm/lib/CodeGen/TargetLoweringObjectFileImpl.cpp
+++ b/llvm/lib/CodeGen/TargetLoweringObjectFileImpl.cpp
@@ -402,8 +402,8 @@ void TargetLoweringObjectFileELF::emitPersonalityValue(
const MachineModuleInfo *MMI) const {
SmallString<64> NameData("DW.ref.");
NameData += Sym->getName();
- MCSymbolELF *Label =
- cast<MCSymbolELF>(getContext().getOrCreateSymbol(NameData));
+ auto *Label =
+ static_cast<MCSymbolELF *>(getContext().getOrCreateSymbol(NameData));
Streamer.emitSymbolAttribute(Label, MCSA_Hidden);
Streamer.emitSymbolAttribute(Label, MCSA_Weak);
unsigned Flags = ELF::SHF_ALLOC | ELF::SHF_WRITE | ELF::SHF_GROUP;
@@ -581,7 +581,8 @@ static const MCSymbolELF *getLinkedToSymbol(const GlobalObject *GO,
auto *VM = cast<ValueAsMetadata>(MD->getOperand(0).get());
auto *OtherGV = dyn_cast<GlobalValue>(VM->getValue());
- return OtherGV ? dyn_cast<MCSymbolELF>(TM.getSymbol(OtherGV)) : nullptr;
+ return OtherGV ? static_cast<const MCSymbolELF *>(TM.getSymbol(OtherGV))
+ : nullptr;
}
static unsigned getEntrySizeForKind(SectionKind Kind) {
@@ -1011,7 +1012,7 @@ MCSection *TargetLoweringObjectFileELF::getSectionForLSDA(
(getContext().getAsmInfo()->useIntegratedAssembler() &&
getContext().getAsmInfo()->binutilsIsAtLeast(2, 36))) {
Flags |= ELF::SHF_LINK_ORDER;
- LinkedToSym = cast<MCSymbolELF>(&FnSym);
+ LinkedToSym = static_cast<const MCSymbolELF *>(&FnSym);
}
// Append the function name as the suffix like GCC, assuming
@@ -2370,9 +2371,10 @@ bool TargetLoweringObjectFileXCOFF::ShouldSetSSPCanaryBitInTB(
MCSymbol *
TargetLoweringObjectFileXCOFF::getEHInfoTableSymbol(const MachineFunction *MF) {
- MCSymbol *EHInfoSym = MF->getContext().getOrCreateSymbol(
- "__ehinfo." + Twine(MF->getFunctionNumber()));
- cast<MCSymbolXCOFF>(EHInfoSym)->setEHInfo();
+ auto *EHInfoSym =
+ static_cast<MCSymbolXCOFF *>(MF->getContext().getOrCreateSymbol(
+ "__ehinfo." + Twine(MF->getFunctionNumber())));
+ EHInfoSym->setEHInfo();
return EHInfoSym;
}
@@ -2510,7 +2512,8 @@ MCSection *TargetLoweringObjectFileXCOFF::SelectSectionForGlobal(
if (Kind.isText()) {
if (TM.getFunctionSections()) {
- return cast<MCSymbolXCOFF>(getFunctionEntryPointSymbol(GO, TM))
+ return static_cast<const MCSymbolXCOFF *>(
+ getFunctionEntryPointSymbol(GO, TM))
->getRepresentedCsect();
}
return TextSection;
@@ -2713,7 +2716,7 @@ MCSection *TargetLoweringObjectFileXCOFF::getSectionForTOCEntry(
const MCSymbol *Sym, const TargetMachine &TM) const {
const XCOFF::StorageMappingClass SMC = [](const MCSymbol *Sym,
const TargetMachine &TM) {
- const MCSymbolXCOFF *XSym = cast<MCSymbolXCOFF>(Sym);
+ auto *XSym = static_cast<const MCSymbolXCOFF *>(Sym);
// The "_$TLSML" symbol for TLS local-dynamic mode requires XMC_TC,
// otherwise the AIX assembler will complain.
@@ -2737,8 +2740,8 @@ MCSection *TargetLoweringObjectFileXCOFF::getSectionForTOCEntry(
}(Sym, TM);
return getContext().getXCOFFSection(
- cast<MCSymbolXCOFF>(Sym)->getSymbolTableName(), SectionKind::getData(),
- XCOFF::CsectProperties(SMC, XCOFF::XTY_SD));
+ static_cast<const MCSymbolXCOFF *>(Sym)->getSymbolTableName(),
+ SectionKind::getData(), XCOFF::CsectProperties(SMC, XCOFF::XTY_SD));
}
MCSection *TargetLoweringObjectFileXCOFF::getSectionForLSDA(