diff options
Diffstat (limited to 'llvm/lib/CodeGen')
21 files changed, 394 insertions, 255 deletions
diff --git a/llvm/lib/CodeGen/AsmPrinter/AsmPrinter.cpp b/llvm/lib/CodeGen/AsmPrinter/AsmPrinter.cpp index 1641c3e..c72b6e8 100644 --- a/llvm/lib/CodeGen/AsmPrinter/AsmPrinter.cpp +++ b/llvm/lib/CodeGen/AsmPrinter/AsmPrinter.cpp @@ -3194,7 +3194,7 @@ void AsmPrinter::emitJumpTableSizesSection(const MachineJumpTableInfo &MJTI, return; if (isElf) { - MCSymbolELF *LinkedToSym = dyn_cast<MCSymbolELF>(CurrentFnSym); + auto *LinkedToSym = static_cast<MCSymbolELF *>(CurrentFnSym); int Flags = F.hasComdat() ? static_cast<int>(ELF::SHF_GROUP) : 0; JumpTableSizesSection = OutContext.getELFSection( @@ -4702,7 +4702,7 @@ void AsmPrinter::emitXRayTable() { const Triple &TT = TM.getTargetTriple(); // Use PC-relative addresses on all targets. if (TT.isOSBinFormatELF()) { - auto LinkedToSym = cast<MCSymbolELF>(CurrentFnSym); + auto LinkedToSym = static_cast<const MCSymbolELF *>(CurrentFnSym); auto Flags = ELF::SHF_ALLOC | ELF::SHF_LINK_ORDER; StringRef GroupName; if (F.hasComdat()) { @@ -4825,7 +4825,7 @@ void AsmPrinter::emitPatchableFunctionEntries() { Flags |= ELF::SHF_GROUP; GroupName = F.getComdat()->getName(); } - LinkedToSym = cast<MCSymbolELF>(CurrentFnSym); + LinkedToSym = static_cast<const MCSymbolELF *>(CurrentFnSym); } OutStreamer->switchSection(OutContext.getELFSection( SectionName, ELF::SHT_PROGBITS, Flags, 0, GroupName, F.hasComdat(), diff --git a/llvm/lib/CodeGen/AsmPrinter/DIEHash.cpp b/llvm/lib/CodeGen/AsmPrinter/DIEHash.cpp index 08ed78e..a7491a2 100644 --- a/llvm/lib/CodeGen/AsmPrinter/DIEHash.cpp +++ b/llvm/lib/CodeGen/AsmPrinter/DIEHash.cpp @@ -230,7 +230,7 @@ void DIEHash::hashBlockData(const DIE::const_value_range &Values) { "Base types referenced from DW_OP_convert should have a name"); hashNestedType(C, Name); } else - Hash.update((uint64_t)V.getDIEInteger().getValue()); + Hash.update(V.getDIEInteger().getValue()); } // Hash the contents of a loclistptr class. diff --git a/llvm/lib/CodeGen/AsmPrinter/DwarfCompileUnit.cpp b/llvm/lib/CodeGen/AsmPrinter/DwarfCompileUnit.cpp index 5577a7d..f9d7e76 100644 --- a/llvm/lib/CodeGen/AsmPrinter/DwarfCompileUnit.cpp +++ b/llvm/lib/CodeGen/AsmPrinter/DwarfCompileUnit.cpp @@ -508,7 +508,8 @@ void DwarfCompileUnit::addWasmRelocBaseGlobal(DIELoc *Loc, StringRef GlobalName, // don't want to depend on target specific headers in this code? const unsigned TI_GLOBAL_RELOC = 3; unsigned PointerSize = Asm->getDataLayout().getPointerSize(); - auto *Sym = cast<MCSymbolWasm>(Asm->GetExternalSymbolSymbol(GlobalName)); + auto *Sym = + static_cast<MCSymbolWasm *>(Asm->GetExternalSymbolSymbol(GlobalName)); // FIXME: this repeats what WebAssemblyMCInstLower:: // GetExternalSymbolSymbol does, since if there's no code that // refers to this symbol, we have to set it here. diff --git a/llvm/lib/CodeGen/AsmPrinter/DwarfDebug.cpp b/llvm/lib/CodeGen/AsmPrinter/DwarfDebug.cpp index 7188833..5ae2d2a 100644 --- a/llvm/lib/CodeGen/AsmPrinter/DwarfDebug.cpp +++ b/llvm/lib/CodeGen/AsmPrinter/DwarfDebug.cpp @@ -940,14 +940,23 @@ void DwarfDebug::constructCallSiteEntryDIEs(const DISubprogram &SP, // In the case of an indirect call find the register that holds // the callee. const MachineOperand &CalleeOp = TII->getCalleeOperand(MI); - if (!CalleeOp.isGlobal() && - (!CalleeOp.isReg() || !CalleeOp.getReg().isPhysical())) + bool PhysRegCalleeOperand = + CalleeOp.isReg() && CalleeOp.getReg().isPhysical(); + // Hack: WebAssembly CALL instructions have MCInstrDesc that does not + // describe the call target operand. + if (CalleeOp.getOperandNo() < MI.getDesc().operands().size()) { + const MCOperandInfo &MCOI = + MI.getDesc().operands()[CalleeOp.getOperandNo()]; + PhysRegCalleeOperand = + PhysRegCalleeOperand && MCOI.OperandType == MCOI::OPERAND_REGISTER; + } + if (!CalleeOp.isGlobal() && !PhysRegCalleeOperand) continue; unsigned CallReg = 0; const DISubprogram *CalleeSP = nullptr; const Function *CalleeDecl = nullptr; - if (CalleeOp.isReg()) { + if (PhysRegCalleeOperand) { CallReg = CalleeOp.getReg(); if (!CallReg) continue; diff --git a/llvm/lib/CodeGen/AssignmentTrackingAnalysis.cpp b/llvm/lib/CodeGen/AssignmentTrackingAnalysis.cpp index ff265b5..260ce8f 100644 --- a/llvm/lib/CodeGen/AssignmentTrackingAnalysis.cpp +++ b/llvm/lib/CodeGen/AssignmentTrackingAnalysis.cpp @@ -1403,7 +1403,7 @@ void AssignmentTrackingLowering::addMemDef(BlockInfo *LiveSet, VariableID Var, const Assignment &AV) { LiveSet->setAssignment(BlockInfo::Stack, Var, AV); - // Use this assigment for all fragments contained within Var, but do not + // Use this assignment for all fragments contained within Var, but do not // provide a Source because we cannot convert Var's value to a value for the // fragment. Assignment FragAV = AV; @@ -1416,7 +1416,7 @@ void AssignmentTrackingLowering::addDbgDef(BlockInfo *LiveSet, VariableID Var, const Assignment &AV) { LiveSet->setAssignment(BlockInfo::Debug, Var, AV); - // Use this assigment for all fragments contained within Var, but do not + // Use this assignment for all fragments contained within Var, but do not // provide a Source because we cannot convert Var's value to a value for the // fragment. Assignment FragAV = AV; diff --git a/llvm/lib/CodeGen/CodeGenPrepare.cpp b/llvm/lib/CodeGen/CodeGenPrepare.cpp index f16283b..9223739 100644 --- a/llvm/lib/CodeGen/CodeGenPrepare.cpp +++ b/llvm/lib/CodeGen/CodeGenPrepare.cpp @@ -1834,7 +1834,7 @@ bool CodeGenPrepare::unfoldPowerOf2Test(CmpInst *Cmp) { /// /// Return true if any changes are made. static bool sinkCmpExpression(CmpInst *Cmp, const TargetLowering &TLI) { - if (TLI.hasMultipleConditionRegisters()) + if (TLI.hasMultipleConditionRegisters(EVT::getEVT(Cmp->getType()))) return false; // Avoid sinking soft-FP comparisons, since this can move them into a loop. diff --git a/llvm/lib/CodeGen/GlobalISel/CSEMIRBuilder.cpp b/llvm/lib/CodeGen/GlobalISel/CSEMIRBuilder.cpp index 0f2c580..59c62cf 100644 --- a/llvm/lib/CodeGen/GlobalISel/CSEMIRBuilder.cpp +++ b/llvm/lib/CodeGen/GlobalISel/CSEMIRBuilder.cpp @@ -95,7 +95,7 @@ void CSEMIRBuilder::profileSrcOp(const SrcOp &Op, GISelInstProfileBuilder &B) const { switch (Op.getSrcOpKind()) { case SrcOp::SrcType::Ty_Imm: - B.addNodeIDImmediate(static_cast<int64_t>(Op.getImm())); + B.addNodeIDImmediate(Op.getImm()); break; case SrcOp::SrcType::Ty_Predicate: B.addNodeIDImmediate(static_cast<int64_t>(Op.getPredicate())); diff --git a/llvm/lib/CodeGen/GlobalISel/IRTranslator.cpp b/llvm/lib/CodeGen/GlobalISel/IRTranslator.cpp index fd38c30..bbfae57 100644 --- a/llvm/lib/CodeGen/GlobalISel/IRTranslator.cpp +++ b/llvm/lib/CodeGen/GlobalISel/IRTranslator.cpp @@ -1592,9 +1592,19 @@ bool IRTranslator::translateGetElementPtr(const User &U, Type *OffsetIRTy = DL->getIndexType(PtrIRTy); LLT OffsetTy = getLLTForType(*OffsetIRTy, *DL); - uint32_t Flags = 0; + uint32_t PtrAddFlags = 0; + // Each PtrAdd generated to implement the GEP inherits its nuw, nusw, inbounds + // flags. if (const Instruction *I = dyn_cast<Instruction>(&U)) - Flags = MachineInstr::copyFlagsFromInstruction(*I); + PtrAddFlags = MachineInstr::copyFlagsFromInstruction(*I); + + auto PtrAddFlagsWithConst = [&](int64_t Offset) { + // For nusw/inbounds GEP with an offset that is nonnegative when interpreted + // as signed, assume there is no unsigned overflow. + if (Offset >= 0 && (PtrAddFlags & MachineInstr::MIFlag::NoUSWrap)) + return PtrAddFlags | MachineInstr::MIFlag::NoUWrap; + return PtrAddFlags; + }; // Normalize Vector GEP - all scalar operands should be converted to the // splat vector. @@ -1644,7 +1654,9 @@ bool IRTranslator::translateGetElementPtr(const User &U, if (Offset != 0) { auto OffsetMIB = MIRBuilder.buildConstant({OffsetTy}, Offset); - BaseReg = MIRBuilder.buildPtrAdd(PtrTy, BaseReg, OffsetMIB.getReg(0)) + BaseReg = MIRBuilder + .buildPtrAdd(PtrTy, BaseReg, OffsetMIB.getReg(0), + PtrAddFlagsWithConst(Offset)) .getReg(0); Offset = 0; } @@ -1668,12 +1680,23 @@ bool IRTranslator::translateGetElementPtr(const User &U, if (ElementSize != 1) { auto ElementSizeMIB = MIRBuilder.buildConstant( getLLTForType(*OffsetIRTy, *DL), ElementSize); + + // The multiplication is NUW if the GEP is NUW and NSW if the GEP is + // NUSW. + uint32_t ScaleFlags = PtrAddFlags & MachineInstr::MIFlag::NoUWrap; + if (PtrAddFlags & MachineInstr::MIFlag::NoUSWrap) + ScaleFlags |= MachineInstr::MIFlag::NoSWrap; + GepOffsetReg = - MIRBuilder.buildMul(OffsetTy, IdxReg, ElementSizeMIB).getReg(0); - } else + MIRBuilder.buildMul(OffsetTy, IdxReg, ElementSizeMIB, ScaleFlags) + .getReg(0); + } else { GepOffsetReg = IdxReg; + } - BaseReg = MIRBuilder.buildPtrAdd(PtrTy, BaseReg, GepOffsetReg).getReg(0); + BaseReg = + MIRBuilder.buildPtrAdd(PtrTy, BaseReg, GepOffsetReg, PtrAddFlags) + .getReg(0); } } @@ -1681,11 +1704,8 @@ bool IRTranslator::translateGetElementPtr(const User &U, auto OffsetMIB = MIRBuilder.buildConstant(OffsetTy, Offset); - if (Offset >= 0 && cast<GEPOperator>(U).isInBounds()) - Flags |= MachineInstr::MIFlag::NoUWrap; - MIRBuilder.buildPtrAdd(getOrCreateVReg(U), BaseReg, OffsetMIB.getReg(0), - Flags); + PtrAddFlagsWithConst(Offset)); return true; } @@ -2189,8 +2209,8 @@ bool IRTranslator::translateKnownIntrinsic(const CallInst &CI, Intrinsic::ID ID, unsigned Op = ID == Intrinsic::lifetime_start ? TargetOpcode::LIFETIME_START : TargetOpcode::LIFETIME_END; - const AllocaInst *AI = cast<AllocaInst>(CI.getArgOperand(1)); - if (!AI->isStaticAlloca()) + const AllocaInst *AI = dyn_cast<AllocaInst>(CI.getArgOperand(1)); + if (!AI || !AI->isStaticAlloca()) return true; MIRBuilder.buildInstr(Op).addFrameIndex(getOrCreateFrameIndex(*AI)); diff --git a/llvm/lib/CodeGen/GlobalISel/Utils.cpp b/llvm/lib/CodeGen/GlobalISel/Utils.cpp index f48bfc0..8955dd0 100644 --- a/llvm/lib/CodeGen/GlobalISel/Utils.cpp +++ b/llvm/lib/CodeGen/GlobalISel/Utils.cpp @@ -1401,6 +1401,21 @@ bool llvm::isBuildVectorConstantSplat(const Register Reg, return false; } +bool llvm::isBuildVectorConstantSplat(const Register Reg, + const MachineRegisterInfo &MRI, + APInt SplatValue, bool AllowUndef) { + if (auto SplatValAndReg = getAnyConstantSplat(Reg, MRI, AllowUndef)) { + if (SplatValAndReg->Value.getBitWidth() < SplatValue.getBitWidth()) + return APInt::isSameValue( + SplatValAndReg->Value.sext(SplatValue.getBitWidth()), SplatValue); + return APInt::isSameValue( + SplatValAndReg->Value, + SplatValue.sext(SplatValAndReg->Value.getBitWidth())); + } + + return false; +} + bool llvm::isBuildVectorConstantSplat(const MachineInstr &MI, const MachineRegisterInfo &MRI, int64_t SplatValue, bool AllowUndef) { @@ -1408,6 +1423,13 @@ bool llvm::isBuildVectorConstantSplat(const MachineInstr &MI, AllowUndef); } +bool llvm::isBuildVectorConstantSplat(const MachineInstr &MI, + const MachineRegisterInfo &MRI, + APInt SplatValue, bool AllowUndef) { + return isBuildVectorConstantSplat(MI.getOperand(0).getReg(), MRI, SplatValue, + AllowUndef); +} + std::optional<APInt> llvm::getIConstantSplatVal(const Register Reg, const MachineRegisterInfo &MRI) { if (auto SplatValAndReg = diff --git a/llvm/lib/CodeGen/RegisterCoalescer.cpp b/llvm/lib/CodeGen/RegisterCoalescer.cpp index 7ede564..514f2f0 100644 --- a/llvm/lib/CodeGen/RegisterCoalescer.cpp +++ b/llvm/lib/CodeGen/RegisterCoalescer.cpp @@ -306,12 +306,7 @@ class RegisterCoalescer : private LiveRangeEdit::Delegate { /// number if it is not zero. If DstReg is a physical register and the /// existing subregister number of the def / use being updated is not zero, /// make sure to set it to the correct physical subregister. - /// - /// If \p SubregToRegSrcInst is not empty, we are coalescing a - /// `DstReg = SUBREG_TO_REG SrcReg`, which should introduce an - /// implicit-def of DstReg on instructions that define SrcReg. - void updateRegDefsUses(Register SrcReg, Register DstReg, unsigned SubIdx, - ArrayRef<MachineInstr *> SubregToRegSrcInst = {}); + void updateRegDefsUses(Register SrcReg, Register DstReg, unsigned SubIdx); /// If the given machine operand reads only undefined lanes add an undef /// flag. @@ -1448,7 +1443,6 @@ bool RegisterCoalescer::reMaterializeTrivialDef(const CoalescerPair &CP, // CopyMI may have implicit operands, save them so that we can transfer them // over to the newly materialized instruction after CopyMI is removed. - LaneBitmask NewMIImplicitOpsMask; SmallVector<MachineOperand, 4> ImplicitOps; ImplicitOps.reserve(CopyMI->getNumOperands() - CopyMI->getDesc().getNumOperands()); @@ -1463,9 +1457,6 @@ bool RegisterCoalescer::reMaterializeTrivialDef(const CoalescerPair &CP, (MO.getSubReg() == 0 && MO.getReg() == DstOperand.getReg())) && "unexpected implicit virtual register def"); ImplicitOps.push_back(MO); - if (MO.isDef() && MO.getReg().isVirtual() && - MRI->shouldTrackSubRegLiveness(DstReg)) - NewMIImplicitOpsMask |= MRI->getMaxLaneMaskForVReg(MO.getReg()); } } @@ -1508,11 +1499,14 @@ bool RegisterCoalescer::reMaterializeTrivialDef(const CoalescerPair &CP, } else { assert(MO.getReg() == NewMI.getOperand(0).getReg()); - // If lanemasks need to be tracked, compile the lanemask of the NewMI - // implicit def operands to avoid subranges for the super-regs from - // being removed by code later on in this function. - if (MRI->shouldTrackSubRegLiveness(MO.getReg())) - NewMIImplicitOpsMask |= MRI->getMaxLaneMaskForVReg(MO.getReg()); + // We're only expecting another def of the main output, so the range + // should get updated with the regular output range. + // + // FIXME: The range updating below probably needs updating to look at + // the super register if subranges are tracked. + assert(!MRI->shouldTrackSubRegLiveness(DstReg) && + "subrange update for implicit-def of super register may not be " + "properly handled"); } } } @@ -1612,8 +1606,7 @@ bool RegisterCoalescer::reMaterializeTrivialDef(const CoalescerPair &CP, CurrIdx.getRegSlot(NewMI.getOperand(0).isEarlyClobber()); VNInfo::Allocator &Alloc = LIS->getVNInfoAllocator(); for (LiveInterval::SubRange &SR : DstInt.subranges()) { - if ((SR.LaneMask & DstMask).none() && - (SR.LaneMask & NewMIImplicitOpsMask).none()) { + if ((SR.LaneMask & DstMask).none()) { LLVM_DEBUG(dbgs() << "Removing undefined SubRange " << PrintLaneMask(SR.LaneMask) << " : " << SR << "\n"); @@ -1631,11 +1624,11 @@ bool RegisterCoalescer::reMaterializeTrivialDef(const CoalescerPair &CP, UpdatedSubRanges = true; } else { // We know that this lane is defined by this instruction, - // but at this point it may be empty because it is not used by - // anything. This happens when updateRegDefUses adds the missing - // lanes. Assign that lane a dead def so that the interferences - // are properly modeled. - if (SR.empty()) + // but at this point it might not be live because it was not defined + // by the original instruction. This happens when the + // rematerialization widens the defined register. Assign that lane a + // dead def so that the interferences are properly modeled. + if (!SR.liveAt(DefIndex)) SR.createDeadDef(DefIndex, Alloc); } } @@ -1877,14 +1870,11 @@ void RegisterCoalescer::addUndefFlag(const LiveInterval &Int, SlotIndex UseIdx, } } -void RegisterCoalescer::updateRegDefsUses( - Register SrcReg, Register DstReg, unsigned SubIdx, - ArrayRef<MachineInstr *> SubregToRegSrcInsts) { +void RegisterCoalescer::updateRegDefsUses(Register SrcReg, Register DstReg, + unsigned SubIdx) { bool DstIsPhys = DstReg.isPhysical(); LiveInterval *DstInt = DstIsPhys ? nullptr : &LIS->getInterval(DstReg); - // Coalescing a COPY may expose reads of 'undef' subregisters. - // If so, then explicitly propagate 'undef' to those operands. if (DstInt && DstInt->hasSubRanges() && DstReg != SrcReg) { for (MachineOperand &MO : MRI->reg_operands(DstReg)) { if (MO.isUndef()) @@ -1901,15 +1891,6 @@ void RegisterCoalescer::updateRegDefsUses( } } - // If DstInt already has a subrange for the unused lanes, then we shouldn't - // create duplicate subranges when we update the interval for unused lanes. - LaneBitmask DstIntLaneMask; - if (DstInt && MRI->shouldTrackSubRegLiveness(DstReg)) { - for (LiveInterval::SubRange &SR : DstInt->subranges()) - DstIntLaneMask |= SR.LaneMask; - } - - // Go through all instructions to replace uses of 'SrcReg' by 'DstReg'. SmallPtrSet<MachineInstr *, 8> Visited; for (MachineRegisterInfo::reg_instr_iterator I = MRI->reg_instr_begin(SrcReg), E = MRI->reg_instr_end(); @@ -1933,80 +1914,6 @@ void RegisterCoalescer::updateRegDefsUses( if (DstInt && !Reads && SubIdx && !UseMI->isDebugInstr()) Reads = DstInt->liveAt(LIS->getInstructionIndex(*UseMI)); - bool RequiresImplicitRedef = false; - if (!SubregToRegSrcInsts.empty()) { - // We can only add an implicit-def and undef if the sub registers match, - // e.g. - // %0:gr32 = INSTX - // %0.sub8:gr32 = INSTY // top 24 bits of %0 still defined - // %1:gr64 = SUBREG_TO_REG 0, %0, %subreg.sub32 - // - // This cannot be transformed into: - // %1.sub32:gr64 = INSTX - // undef %1.sub8:gr64 = INSTY , implicit-def %1 - // - // Because that would thrash the top 24 bits of %1.sub32. - if (is_contained(SubregToRegSrcInsts, UseMI) && - all_of(UseMI->defs(), - [&SubIdx, &SrcReg](const MachineOperand &MO) -> bool { - if (MO.getReg() != SrcReg || !MO.getSubReg() || MO.isUndef()) - return true; - return SubIdx == MO.getSubReg(); - })) { - // Add implicit-def of super-register to express that the whole - // register is defined by the instruction. - MachineInstrBuilder MIB(*MF, UseMI); - MIB.addReg(DstReg, RegState::ImplicitDefine); - RequiresImplicitRedef = true; - } - - // If the coalesed instruction doesn't fully define the register, we need - // to preserve the original super register liveness for SUBREG_TO_REG. - // - // We pretended SUBREG_TO_REG was a regular copy for coalescing purposes, - // but it introduces liveness for other subregisters. Downstream users may - // have been relying on those bits, so we need to ensure their liveness is - // captured with a def of other lanes. - if (DstInt && MRI->shouldTrackSubRegLiveness(DstReg)) { - // First check if there is sufficient granularity in terms of subranges. - LaneBitmask DstMask = MRI->getMaxLaneMaskForVReg(DstInt->reg()); - LaneBitmask UsedLanes = TRI->getSubRegIndexLaneMask(SubIdx); - LaneBitmask UnusedLanes = DstMask & ~UsedLanes; - if ((UnusedLanes & ~DstIntLaneMask).any()) { - BumpPtrAllocator &Allocator = LIS->getVNInfoAllocator(); - DstInt->createSubRangeFrom(Allocator, UnusedLanes, *DstInt); - DstIntLaneMask |= UnusedLanes; - } - - // After duplicating the live ranges for the low/hi bits, we - // need to update the subranges of the DstReg interval such that - // for a case like this: - // - // entry: - // 16B %1:gpr32 = INSTRUCTION (<=> UseMI) - // : - // if.then: - // 32B %1:gpr32 = MOVIMM32 .. - // 48B %0:gpr64 = SUBREG_TO_REG 0, %1, sub32 - // - // Only the MOVIMM32 require a def of the top lanes and any intervals - // for the top 32-bits of the def at 16B should be removed. - for (LiveInterval::SubRange &SR : DstInt->subranges()) { - if (!Writes || RequiresImplicitRedef || - (SR.LaneMask & UnusedLanes).none()) - continue; - - assert((SR.LaneMask & UnusedLanes) == SR.LaneMask && - "Unexpected lanemask. Subrange needs finer granularity"); - - SlotIndex UseIdx = LIS->getInstructionIndex(*UseMI).getRegSlot(false); - auto SegmentI = SR.find(UseIdx); - if (SegmentI != SR.end()) - SR.removeSegment(SegmentI, true); - } - } - } - // Replace SrcReg with DstReg in all UseMI operands. for (unsigned Op : Ops) { MachineOperand &MO = UseMI->getOperand(Op); @@ -2015,7 +1922,7 @@ void RegisterCoalescer::updateRegDefsUses( // turn a full def into a read-modify-write sub-register def and vice // versa. if (SubIdx && MO.isDef()) - MO.setIsUndef(!Reads || RequiresImplicitRedef); + MO.setIsUndef(!Reads); // A subreg use of a partially undef (super) register may be a complete // undef use now and then has to be marked that way. @@ -2118,30 +2025,6 @@ void RegisterCoalescer::setUndefOnPrunedSubRegUses(LiveInterval &LI, LIS->shrinkToUses(&LI); } -/// For a given use of value \p Idx, it returns the def in the current block, -/// or otherwise all possible defs in preceding blocks. -static bool FindDefInBlock(SmallPtrSetImpl<MachineBasicBlock *> &VisitedBlocks, - SmallVector<MachineInstr *> &Instrs, - LiveIntervals *LIS, LiveInterval &SrcInt, - MachineBasicBlock *MBB, VNInfo *Idx) { - if (!Idx->isPHIDef()) { - MachineInstr *Def = LIS->getInstructionFromIndex(Idx->def); - assert(Def && "Unable to find a def for SUBREG_TO_REG source operand"); - Instrs.push_back(Def); - return true; - } - - bool Any = false; - if (VisitedBlocks.count(MBB)) - return false; - VisitedBlocks.insert(MBB); - for (MachineBasicBlock *Pred : MBB->predecessors()) { - Any |= FindDefInBlock(VisitedBlocks, Instrs, LIS, SrcInt, Pred, - SrcInt.getVNInfoBefore(LIS->getMBBEndIdx(Pred))); - } - return Any; -} - bool RegisterCoalescer::joinCopy( MachineInstr *CopyMI, bool &Again, SmallPtrSetImpl<MachineInstr *> &CurrentErasedInstrs) { @@ -2273,35 +2156,6 @@ bool RegisterCoalescer::joinCopy( }); } - SmallVector<MachineInstr *> SubregToRegSrcInsts; - if (CopyMI->isSubregToReg()) { - // For the case where the copy instruction is a SUBREG_TO_REG, e.g. - // - // %0:gpr32 = movimm32 .. - // %1:gpr64 = SUBREG_TO_REG 0, %0, sub32 - // ... - // %0:gpr32 = COPY <something> - // - // After joining liveranges, the original `movimm32` will need an - // implicit-def to make it explicit that the entire register is written, - // i.e. - // - // undef %0.sub32:gpr64 = movimm32 ..., implicit-def %0 - // ... - // undef %0.sub32:gpr64 = COPY <something> // Note that this does not - // // require an implicit-def, - // // because it has nothing to - // // do with the SUBREG_TO_REG. - LiveInterval &SrcInt = - LIS->getInterval(CP.isFlipped() ? CP.getDstReg() : CP.getSrcReg()); - SlotIndex SubregToRegSlotIdx = LIS->getInstructionIndex(*CopyMI); - SmallPtrSet<MachineBasicBlock *, 8> VisitedBlocks; - if (!FindDefInBlock(VisitedBlocks, SubregToRegSrcInsts, LIS, SrcInt, - CopyMI->getParent(), - SrcInt.Query(SubregToRegSlotIdx).valueIn())) - llvm_unreachable("SUBREG_TO_REG src requires a def"); - } - ShrinkMask = LaneBitmask::getNone(); ShrinkMainRange = false; @@ -2371,12 +2225,9 @@ bool RegisterCoalescer::joinCopy( // Rewrite all SrcReg operands to DstReg. // Also update DstReg operands to include DstIdx if it is set. - if (CP.getDstIdx()) { - assert(SubregToRegSrcInsts.empty() && "can this happen?"); + if (CP.getDstIdx()) updateRegDefsUses(CP.getDstReg(), CP.getDstReg(), CP.getDstIdx()); - } - updateRegDefsUses(CP.getSrcReg(), CP.getDstReg(), CP.getSrcIdx(), - SubregToRegSrcInsts); + updateRegDefsUses(CP.getSrcReg(), CP.getDstReg(), CP.getSrcIdx()); // Shrink subregister ranges if necessary. if (ShrinkMask.any()) { diff --git a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp index 11e869a..d70e96938 100644 --- a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp @@ -4075,18 +4075,11 @@ SDValue DAGCombiner::visitSUB(SDNode *N) { unsigned BitWidth = VT.getScalarSizeInBits(); SDLoc DL(N); - auto PeekThroughFreeze = [](SDValue N) { - if (N->getOpcode() == ISD::FREEZE && N.hasOneUse()) - return N->getOperand(0); - return N; - }; - if (SDValue V = foldSubCtlzNot<EmptyMatchContext>(N, DAG)) return V; // fold (sub x, x) -> 0 - // FIXME: Refactor this and xor and other similar operations together. - if (PeekThroughFreeze(N0) == PeekThroughFreeze(N1)) + if (N0 == N1) return tryFoldToZero(DL, TLI, VT, DAG, LegalOperations); // fold (sub c1, c2) -> c3 @@ -6499,19 +6492,21 @@ static unsigned getMinMaxOpcodeForFP(SDValue Operand1, SDValue Operand2, // It is safe to use FMINNUM_IEEE/FMAXNUM_IEEE if all the operands // are non NaN values. if (((CC == ISD::SETLT || CC == ISD::SETLE) && (OrAndOpcode == ISD::OR)) || - ((CC == ISD::SETGT || CC == ISD::SETGE) && (OrAndOpcode == ISD::AND))) + ((CC == ISD::SETGT || CC == ISD::SETGE) && (OrAndOpcode == ISD::AND))) { return arebothOperandsNotNan(Operand1, Operand2, DAG) && isFMAXNUMFMINNUM_IEEE ? ISD::FMINNUM_IEEE : ISD::DELETED_NODE; - else if (((CC == ISD::SETGT || CC == ISD::SETGE) && - (OrAndOpcode == ISD::OR)) || - ((CC == ISD::SETLT || CC == ISD::SETLE) && - (OrAndOpcode == ISD::AND))) + } + + if (((CC == ISD::SETGT || CC == ISD::SETGE) && (OrAndOpcode == ISD::OR)) || + ((CC == ISD::SETLT || CC == ISD::SETLE) && (OrAndOpcode == ISD::AND))) { return arebothOperandsNotNan(Operand1, Operand2, DAG) && isFMAXNUMFMINNUM_IEEE ? ISD::FMAXNUM_IEEE : ISD::DELETED_NODE; + } + // Both FMINNUM/FMAXNUM and FMINNUM_IEEE/FMAXNUM_IEEE handle quiet // NaNs in the same way. But, FMINNUM/FMAXNUM and FMINNUM_IEEE/ // FMAXNUM_IEEE handle signaling NaNs differently. If we cannot prove @@ -6521,24 +6516,24 @@ static unsigned getMinMaxOpcodeForFP(SDValue Operand1, SDValue Operand2, // we can prove that we do not have any sNaNs, then we can do the // optimization using FMINNUM_IEEE/FMAXNUM_IEEE for the following // cases. - else if (((CC == ISD::SETOLT || CC == ISD::SETOLE) && - (OrAndOpcode == ISD::OR)) || - ((CC == ISD::SETUGT || CC == ISD::SETUGE) && - (OrAndOpcode == ISD::AND))) + if (((CC == ISD::SETOLT || CC == ISD::SETOLE) && (OrAndOpcode == ISD::OR)) || + ((CC == ISD::SETUGT || CC == ISD::SETUGE) && (OrAndOpcode == ISD::AND))) { return isFMAXNUMFMINNUM ? ISD::FMINNUM - : arebothOperandsNotSNan(Operand1, Operand2, DAG) && - isFMAXNUMFMINNUM_IEEE - ? ISD::FMINNUM_IEEE - : ISD::DELETED_NODE; - else if (((CC == ISD::SETOGT || CC == ISD::SETOGE) && - (OrAndOpcode == ISD::OR)) || - ((CC == ISD::SETULT || CC == ISD::SETULE) && - (OrAndOpcode == ISD::AND))) + : arebothOperandsNotSNan(Operand1, Operand2, DAG) && + isFMAXNUMFMINNUM_IEEE + ? ISD::FMINNUM_IEEE + : ISD::DELETED_NODE; + } + + if (((CC == ISD::SETOGT || CC == ISD::SETOGE) && (OrAndOpcode == ISD::OR)) || + ((CC == ISD::SETULT || CC == ISD::SETULE) && (OrAndOpcode == ISD::AND))) { return isFMAXNUMFMINNUM ? ISD::FMAXNUM - : arebothOperandsNotSNan(Operand1, Operand2, DAG) && - isFMAXNUMFMINNUM_IEEE - ? ISD::FMAXNUM_IEEE - : ISD::DELETED_NODE; + : arebothOperandsNotSNan(Operand1, Operand2, DAG) && + isFMAXNUMFMINNUM_IEEE + ? ISD::FMAXNUM_IEEE + : ISD::DELETED_NODE; + } + return ISD::DELETED_NODE; } @@ -13184,14 +13179,14 @@ static SDValue combineVSelectWithAllOnesOrZeros(SDValue Cond, SDValue TVal, // select Cond, -1, x → or Cond, x if (IsTAllOne) { - SDValue X = DAG.getBitcast(CondVT, FVal); + SDValue X = DAG.getBitcast(CondVT, DAG.getFreeze(FVal)); SDValue Or = DAG.getNode(ISD::OR, DL, CondVT, Cond, X); return DAG.getBitcast(VT, Or); } // select Cond, x, 0 → and Cond, x if (IsFAllZero) { - SDValue X = DAG.getBitcast(CondVT, TVal); + SDValue X = DAG.getBitcast(CondVT, DAG.getFreeze(TVal)); SDValue And = DAG.getNode(ISD::AND, DL, CondVT, Cond, X); return DAG.getBitcast(VT, And); } @@ -13199,7 +13194,7 @@ static SDValue combineVSelectWithAllOnesOrZeros(SDValue Cond, SDValue TVal, // select Cond, 0, x -> and not(Cond), x if (IsTAllZero && (isBitwiseNot(peekThroughBitcasts(Cond)) || TLI.hasAndNot(Cond))) { - SDValue X = DAG.getBitcast(CondVT, FVal); + SDValue X = DAG.getBitcast(CondVT, DAG.getFreeze(FVal)); SDValue And = DAG.getNode(ISD::AND, DL, CondVT, DAG.getNOT(DL, Cond, CondVT), X); return DAG.getBitcast(VT, And); @@ -16754,6 +16749,17 @@ SDValue DAGCombiner::visitFREEZE(SDNode *N) { if (DAG.isGuaranteedNotToBeUndefOrPoison(N0, /*PoisonOnly*/ false)) return N0; + // If we have frozen and unfrozen users of N0, update so everything uses N. + if (!N0.isUndef() && !N0.hasOneUse()) { + SDValue FrozenN0(N, 0); + DAG.ReplaceAllUsesOfValueWith(N0, FrozenN0); + // ReplaceAllUsesOfValueWith will have also updated the use in N, thus + // creating a cycle in a DAG. Let's undo that by mutating the freeze. + assert(N->getOperand(0) == FrozenN0 && "Expected cycle in DAG"); + DAG.UpdateNodeOperands(N, N0); + return FrozenN0; + } + // We currently avoid folding freeze over SRA/SRL, due to the problems seen // with (freeze (assert ext)) blocking simplifications of SRA/SRL. See for // example https://reviews.llvm.org/D136529#4120959. @@ -16807,8 +16813,7 @@ SDValue DAGCombiner::visitFREEZE(SDNode *N) { SmallSet<SDValue, 8> MaybePoisonOperands; SmallVector<unsigned, 8> MaybePoisonOperandNumbers; for (auto [OpNo, Op] : enumerate(N0->ops())) { - if (DAG.isGuaranteedNotToBeUndefOrPoison(Op, /*PoisonOnly*/ false, - /*Depth*/ 1)) + if (DAG.isGuaranteedNotToBeUndefOrPoison(Op, /*PoisonOnly=*/false)) continue; bool HadMaybePoisonOperands = !MaybePoisonOperands.empty(); bool IsNewMaybePoisonOperand = MaybePoisonOperands.insert(Op).second; @@ -22534,6 +22539,56 @@ SDValue DAGCombiner::visitATOMIC_STORE(SDNode *N) { return SDValue(); } +static SDValue foldToMaskedStore(StoreSDNode *Store, SelectionDAG &DAG, + const SDLoc &Dl) { + if (!Store->isSimple() || !ISD::isNormalStore(Store)) + return SDValue(); + + SDValue StoredVal = Store->getValue(); + SDValue StorePtr = Store->getBasePtr(); + SDValue StoreOffset = Store->getOffset(); + EVT VT = Store->getMemoryVT(); + unsigned AddrSpace = Store->getAddressSpace(); + Align Alignment = Store->getAlign(); + const TargetLowering &TLI = DAG.getTargetLoweringInfo(); + + if (!TLI.isOperationLegalOrCustom(ISD::MSTORE, VT) || + !TLI.allowsMisalignedMemoryAccesses(VT, AddrSpace, Alignment)) + return SDValue(); + + SDValue Mask, OtherVec, LoadCh; + unsigned LoadPos; + if (sd_match(StoredVal, + m_VSelect(m_Value(Mask), m_Value(OtherVec), + m_Load(m_Value(LoadCh), m_Specific(StorePtr), + m_Specific(StoreOffset))))) { + LoadPos = 2; + } else if (sd_match(StoredVal, + m_VSelect(m_Value(Mask), + m_Load(m_Value(LoadCh), m_Specific(StorePtr), + m_Specific(StoreOffset)), + m_Value(OtherVec)))) { + LoadPos = 1; + } else { + return SDValue(); + } + + auto *Load = cast<LoadSDNode>(StoredVal.getOperand(LoadPos)); + if (!Load->isSimple() || !ISD::isNormalLoad(Load) || + Load->getAddressSpace() != AddrSpace) + return SDValue(); + + if (!Store->getChain().reachesChainWithoutSideEffects(LoadCh)) + return SDValue(); + + if (LoadPos == 1) + Mask = DAG.getNOT(Dl, Mask, Mask.getValueType()); + + return DAG.getMaskedStore(Store->getChain(), Dl, OtherVec, StorePtr, + StoreOffset, Mask, VT, Store->getMemOperand(), + Store->getAddressingMode()); +} + SDValue DAGCombiner::visitSTORE(SDNode *N) { StoreSDNode *ST = cast<StoreSDNode>(N); SDValue Chain = ST->getChain(); @@ -22768,6 +22823,9 @@ SDValue DAGCombiner::visitSTORE(SDNode *N) { if (SDValue NewSt = splitMergedValStore(ST)) return NewSt; + if (SDValue MaskedStore = foldToMaskedStore(ST, DAG, SDLoc(N))) + return MaskedStore; + return ReduceLoadOpStoreWidth(N); } diff --git a/llvm/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp b/llvm/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp index 583a85a..a5bd97a 100644 --- a/llvm/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp @@ -2217,8 +2217,9 @@ SDValue DAGTypeLegalizer::PromoteIntOp_BITCAST(SDNode *N) { switch (getTypeAction(InVT)) { case TargetLowering::TypePromoteInteger: { - // TODO: Handle big endian - if (OutVT.isVector() && DAG.getDataLayout().isLittleEndian()) { + // TODO: Handle big endian & vector input type. + if (OutVT.isVector() && !InVT.isVector() && + DAG.getDataLayout().isLittleEndian()) { EVT EltVT = OutVT.getVectorElementType(); TypeSize EltSize = EltVT.getSizeInBits(); TypeSize NInSize = NInVT.getSizeInBits(); diff --git a/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.h b/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.h index 2e13b18..63544e6 100644 --- a/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.h +++ b/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.h @@ -971,6 +971,7 @@ private: void SplitVecRes_INSERT_VECTOR_ELT(SDNode *N, SDValue &Lo, SDValue &Hi); void SplitVecRes_LOAD(LoadSDNode *LD, SDValue &Lo, SDValue &Hi); void SplitVecRes_VP_LOAD(VPLoadSDNode *LD, SDValue &Lo, SDValue &Hi); + void SplitVecRes_VP_LOAD_FF(VPLoadFFSDNode *LD, SDValue &Lo, SDValue &Hi); void SplitVecRes_VP_STRIDED_LOAD(VPStridedLoadSDNode *SLD, SDValue &Lo, SDValue &Hi); void SplitVecRes_MLOAD(MaskedLoadSDNode *MLD, SDValue &Lo, SDValue &Hi); @@ -1075,6 +1076,7 @@ private: SDValue WidenVecRes_INSERT_VECTOR_ELT(SDNode* N); SDValue WidenVecRes_LOAD(SDNode* N); SDValue WidenVecRes_VP_LOAD(VPLoadSDNode *N); + SDValue WidenVecRes_VP_LOAD_FF(VPLoadFFSDNode *N); SDValue WidenVecRes_VP_STRIDED_LOAD(VPStridedLoadSDNode *N); SDValue WidenVecRes_VECTOR_COMPRESS(SDNode *N); SDValue WidenVecRes_MLOAD(MaskedLoadSDNode* N); diff --git a/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp b/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp index 1661814..bc2dbfb 100644 --- a/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp @@ -1152,6 +1152,9 @@ void DAGTypeLegalizer::SplitVectorResult(SDNode *N, unsigned ResNo) { case ISD::VP_LOAD: SplitVecRes_VP_LOAD(cast<VPLoadSDNode>(N), Lo, Hi); break; + case ISD::VP_LOAD_FF: + SplitVecRes_VP_LOAD_FF(cast<VPLoadFFSDNode>(N), Lo, Hi); + break; case ISD::EXPERIMENTAL_VP_STRIDED_LOAD: SplitVecRes_VP_STRIDED_LOAD(cast<VPStridedLoadSDNode>(N), Lo, Hi); break; @@ -2227,6 +2230,45 @@ void DAGTypeLegalizer::SplitVecRes_VP_LOAD(VPLoadSDNode *LD, SDValue &Lo, ReplaceValueWith(SDValue(LD, 1), Ch); } +void DAGTypeLegalizer::SplitVecRes_VP_LOAD_FF(VPLoadFFSDNode *LD, SDValue &Lo, + SDValue &Hi) { + SDLoc dl(LD); + auto [LoVT, HiVT] = DAG.GetSplitDestVTs(LD->getValueType(0)); + + SDValue Ch = LD->getChain(); + SDValue Ptr = LD->getBasePtr(); + Align Alignment = LD->getBaseAlign(); + SDValue Mask = LD->getMask(); + SDValue EVL = LD->getVectorLength(); + + // Split Mask operand + SDValue MaskLo, MaskHi; + if (Mask.getOpcode() == ISD::SETCC) { + SplitVecRes_SETCC(Mask.getNode(), MaskLo, MaskHi); + } else { + if (getTypeAction(Mask.getValueType()) == TargetLowering::TypeSplitVector) + GetSplitVector(Mask, MaskLo, MaskHi); + else + std::tie(MaskLo, MaskHi) = DAG.SplitVector(Mask, dl); + } + + // Split EVL operand + auto [EVLLo, EVLHi] = DAG.SplitEVL(EVL, LD->getValueType(0), dl); + + MachineMemOperand *MMO = DAG.getMachineFunction().getMachineMemOperand( + LD->getPointerInfo(), MachineMemOperand::MOLoad, + LocationSize::beforeOrAfterPointer(), Alignment, LD->getAAInfo(), + LD->getRanges()); + + Lo = DAG.getLoadFFVP(LoVT, dl, Ch, Ptr, MaskLo, EVLLo, MMO); + + // Fill the upper half with poison. + Hi = DAG.getUNDEF(HiVT); + + ReplaceValueWith(SDValue(LD, 1), Lo.getValue(1)); + ReplaceValueWith(SDValue(LD, 2), Lo.getValue(2)); +} + void DAGTypeLegalizer::SplitVecRes_VP_STRIDED_LOAD(VPStridedLoadSDNode *SLD, SDValue &Lo, SDValue &Hi) { assert(SLD->isUnindexed() && @@ -4707,6 +4749,9 @@ void DAGTypeLegalizer::WidenVectorResult(SDNode *N, unsigned ResNo) { case ISD::VP_LOAD: Res = WidenVecRes_VP_LOAD(cast<VPLoadSDNode>(N)); break; + case ISD::VP_LOAD_FF: + Res = WidenVecRes_VP_LOAD_FF(cast<VPLoadFFSDNode>(N)); + break; case ISD::EXPERIMENTAL_VP_STRIDED_LOAD: Res = WidenVecRes_VP_STRIDED_LOAD(cast<VPStridedLoadSDNode>(N)); break; @@ -6163,6 +6208,29 @@ SDValue DAGTypeLegalizer::WidenVecRes_VP_LOAD(VPLoadSDNode *N) { return Res; } +SDValue DAGTypeLegalizer::WidenVecRes_VP_LOAD_FF(VPLoadFFSDNode *N) { + EVT WidenVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0)); + SDValue Mask = N->getMask(); + SDValue EVL = N->getVectorLength(); + SDLoc dl(N); + + // The mask should be widened as well + assert(getTypeAction(Mask.getValueType()) == + TargetLowering::TypeWidenVector && + "Unable to widen binary VP op"); + Mask = GetWidenedVector(Mask); + assert(Mask.getValueType().getVectorElementCount() == + TLI.getTypeToTransformTo(*DAG.getContext(), Mask.getValueType()) + .getVectorElementCount() && + "Unable to widen vector load"); + + SDValue Res = DAG.getLoadFFVP(WidenVT, dl, N->getChain(), N->getBasePtr(), + Mask, EVL, N->getMemOperand()); + ReplaceValueWith(SDValue(N, 1), Res.getValue(1)); + ReplaceValueWith(SDValue(N, 2), Res.getValue(2)); + return Res; +} + SDValue DAGTypeLegalizer::WidenVecRes_VP_STRIDED_LOAD(VPStridedLoadSDNode *N) { SDLoc DL(N); diff --git a/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp b/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp index f41b6eb..71a175d 100644 --- a/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp @@ -837,6 +837,14 @@ static void AddNodeIDCustom(FoldingSetNodeID &ID, const SDNode *N) { ID.AddInteger(ELD->getMemOperand()->getFlags()); break; } + case ISD::VP_LOAD_FF: { + const auto *LD = cast<VPLoadFFSDNode>(N); + ID.AddInteger(LD->getMemoryVT().getRawBits()); + ID.AddInteger(LD->getRawSubclassData()); + ID.AddInteger(LD->getPointerInfo().getAddrSpace()); + ID.AddInteger(LD->getMemOperand()->getFlags()); + break; + } case ISD::VP_STORE: { const VPStoreSDNode *EST = cast<VPStoreSDNode>(N); ID.AddInteger(EST->getMemoryVT().getRawBits()); @@ -6351,8 +6359,7 @@ SDValue SelectionDAG::getNode(unsigned Opcode, const SDLoc &DL, EVT VT, break; case ISD::FREEZE: assert(VT == N1.getValueType() && "Unexpected VT!"); - if (isGuaranteedNotToBeUndefOrPoison(N1, /*PoisonOnly*/ false, - /*Depth*/ 1)) + if (isGuaranteedNotToBeUndefOrPoison(N1, /*PoisonOnly=*/false)) return N1; break; case ISD::TokenFactor: @@ -10434,6 +10441,34 @@ SDValue SelectionDAG::getMaskedHistogram(SDVTList VTs, EVT MemVT, return V; } +SDValue SelectionDAG::getLoadFFVP(EVT VT, const SDLoc &DL, SDValue Chain, + SDValue Ptr, SDValue Mask, SDValue EVL, + MachineMemOperand *MMO) { + SDVTList VTs = getVTList(VT, EVL.getValueType(), MVT::Other); + SDValue Ops[] = {Chain, Ptr, Mask, EVL}; + FoldingSetNodeID ID; + AddNodeIDNode(ID, ISD::VP_LOAD_FF, VTs, Ops); + ID.AddInteger(VT.getRawBits()); + ID.AddInteger(getSyntheticNodeSubclassData<VPLoadFFSDNode>(DL.getIROrder(), + VTs, VT, MMO)); + ID.AddInteger(MMO->getPointerInfo().getAddrSpace()); + ID.AddInteger(MMO->getFlags()); + void *IP = nullptr; + if (SDNode *E = FindNodeOrInsertPos(ID, DL, IP)) { + cast<VPLoadFFSDNode>(E)->refineAlignment(MMO); + return SDValue(E, 0); + } + auto *N = newSDNode<VPLoadFFSDNode>(DL.getIROrder(), DL.getDebugLoc(), VTs, + VT, MMO); + createOperands(N, Ops); + + CSEMap.InsertNode(N, IP); + InsertNode(N); + SDValue V(N, 0); + NewSDValueDbgMsg(V, "Creating new node: ", this); + return V; +} + SDValue SelectionDAG::getGetFPEnv(SDValue Chain, const SDLoc &dl, SDValue Ptr, EVT MemVT, MachineMemOperand *MMO) { assert(Chain.getValueType() == MVT::Other && "Invalid chain type"); diff --git a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp index 306e068..d0815e9 100644 --- a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp @@ -7598,7 +7598,9 @@ void SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, if (TM.getOptLevel() == CodeGenOptLevel::None) return; - const AllocaInst *LifetimeObject = cast<AllocaInst>(I.getArgOperand(1)); + const AllocaInst *LifetimeObject = dyn_cast<AllocaInst>(I.getArgOperand(1)); + if (!LifetimeObject) + return; // First check that the Alloca is static, otherwise it won't have a // valid frame index. @@ -8440,6 +8442,34 @@ void SelectionDAGBuilder::visitVPLoad( setValue(&VPIntrin, LD); } +void SelectionDAGBuilder::visitVPLoadFF( + const VPIntrinsic &VPIntrin, EVT VT, EVT EVLVT, + const SmallVectorImpl<SDValue> &OpValues) { + assert(OpValues.size() == 3 && "Unexpected number of operands"); + SDLoc DL = getCurSDLoc(); + Value *PtrOperand = VPIntrin.getArgOperand(0); + MaybeAlign Alignment = VPIntrin.getPointerAlignment(); + AAMDNodes AAInfo = VPIntrin.getAAMetadata(); + const MDNode *Ranges = VPIntrin.getMetadata(LLVMContext::MD_range); + SDValue LD; + // Do not serialize variable-length loads of constant memory with + // anything. + if (!Alignment) + Alignment = DAG.getEVTAlign(VT); + MemoryLocation ML = MemoryLocation::getAfter(PtrOperand, AAInfo); + bool AddToChain = !BatchAA || !BatchAA->pointsToConstantMemory(ML); + SDValue InChain = AddToChain ? DAG.getRoot() : DAG.getEntryNode(); + MachineMemOperand *MMO = DAG.getMachineFunction().getMachineMemOperand( + MachinePointerInfo(PtrOperand), MachineMemOperand::MOLoad, + LocationSize::beforeOrAfterPointer(), *Alignment, AAInfo, Ranges); + LD = DAG.getLoadFFVP(VT, DL, InChain, OpValues[0], OpValues[1], OpValues[2], + MMO); + SDValue Trunc = DAG.getNode(ISD::TRUNCATE, DL, EVLVT, LD.getValue(1)); + if (AddToChain) + PendingLoads.push_back(LD.getValue(2)); + setValue(&VPIntrin, DAG.getMergeValues({LD.getValue(0), Trunc}, DL)); +} + void SelectionDAGBuilder::visitVPGather( const VPIntrinsic &VPIntrin, EVT VT, const SmallVectorImpl<SDValue> &OpValues) { @@ -8673,6 +8703,9 @@ void SelectionDAGBuilder::visitVectorPredicationIntrinsic( case ISD::VP_LOAD: visitVPLoad(VPIntrin, ValueVTs[0], OpValues); break; + case ISD::VP_LOAD_FF: + visitVPLoadFF(VPIntrin, ValueVTs[0], ValueVTs[1], OpValues); + break; case ISD::VP_GATHER: visitVPGather(VPIntrin, ValueVTs[0], OpValues); break; diff --git a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.h b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.h index 1c27807..c251755 100644 --- a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.h +++ b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.h @@ -631,6 +631,8 @@ private: void visitVectorExtractLastActive(const CallInst &I, unsigned Intrinsic); void visitVPLoad(const VPIntrinsic &VPIntrin, EVT VT, const SmallVectorImpl<SDValue> &OpValues); + void visitVPLoadFF(const VPIntrinsic &VPIntrin, EVT VT, EVT EVLVT, + const SmallVectorImpl<SDValue> &OpValues); void visitVPStore(const VPIntrinsic &VPIntrin, const SmallVectorImpl<SDValue> &OpValues); void visitVPGather(const VPIntrinsic &VPIntrin, EVT VT, diff --git a/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp b/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp index 48d6b99..e235d14 100644 --- a/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp @@ -775,13 +775,6 @@ SDValue TargetLowering::SimplifyMultipleUseDemandedBits( break; } - case ISD::FREEZE: { - SDValue N0 = Op.getOperand(0); - if (DAG.isGuaranteedNotToBeUndefOrPoison(N0, DemandedElts, - /*PoisonOnly=*/false, Depth + 1)) - return N0; - break; - } case ISD::AND: { LHSKnown = DAG.computeKnownBits(Op.getOperand(0), DemandedElts, Depth + 1); RHSKnown = DAG.computeKnownBits(Op.getOperand(1), DemandedElts, Depth + 1); @@ -5125,6 +5118,20 @@ SDValue TargetLowering::SimplifySetCC(EVT VT, SDValue N0, SDValue N1, Cond == ISD::SETEQ ? ISD::SETLT : ISD::SETGE); } + // fold (setcc (trunc x) c) -> (setcc x c) + if (N0.getOpcode() == ISD::TRUNCATE && + ((N0->getFlags().hasNoUnsignedWrap() && !ISD::isSignedIntSetCC(Cond)) || + (N0->getFlags().hasNoSignedWrap() && + !ISD::isUnsignedIntSetCC(Cond))) && + isTypeDesirableForOp(ISD::SETCC, N0.getOperand(0).getValueType())) { + EVT NewVT = N0.getOperand(0).getValueType(); + SDValue NewConst = DAG.getConstant(ISD::isSignedIntSetCC(Cond) + ? C1.sext(NewVT.getSizeInBits()) + : C1.zext(NewVT.getSizeInBits()), + dl, NewVT); + return DAG.getSetCC(dl, VT, N0.getOperand(0), NewConst, Cond); + } + if (SDValue V = optimizeSetCCOfSignedTruncationCheck(VT, N0, N1, Cond, DCI, dl)) return V; @@ -5363,10 +5370,25 @@ SDValue TargetLowering::SimplifySetCC(EVT VT, SDValue N0, SDValue N1, if (AndRHSC.isNegatedPowerOf2() && C1.isSubsetOf(AndRHSC)) { unsigned ShiftBits = AndRHSC.countr_zero(); if (!shouldAvoidTransformToShift(ShValTy, ShiftBits)) { + // If using an unsigned shift doesn't yield a legal compare + // immediate, try using sra instead. + APInt NewC = C1.lshr(ShiftBits); + if (NewC.getSignificantBits() <= 64 && + !isLegalICmpImmediate(NewC.getSExtValue())) { + APInt SignedC = C1.ashr(ShiftBits); + if (SignedC.getSignificantBits() <= 64 && + isLegalICmpImmediate(SignedC.getSExtValue())) { + SDValue Shift = DAG.getNode( + ISD::SRA, dl, ShValTy, N0.getOperand(0), + DAG.getShiftAmountConstant(ShiftBits, ShValTy, dl)); + SDValue CmpRHS = DAG.getConstant(SignedC, dl, ShValTy); + return DAG.getSetCC(dl, VT, Shift, CmpRHS, Cond); + } + } SDValue Shift = DAG.getNode( ISD::SRL, dl, ShValTy, N0.getOperand(0), DAG.getShiftAmountConstant(ShiftBits, ShValTy, dl)); - SDValue CmpRHS = DAG.getConstant(C1.lshr(ShiftBits), dl, ShValTy); + SDValue CmpRHS = DAG.getConstant(NewC, dl, ShValTy); return DAG.getSetCC(dl, VT, Shift, CmpRHS, Cond); } } @@ -5646,6 +5668,17 @@ SDValue TargetLowering::SimplifySetCC(EVT VT, SDValue N0, SDValue N1, return N0; } + // Fold (setcc (trunc x) (trunc y)) -> (setcc x y) + if (N0.getOpcode() == ISD::TRUNCATE && N1.getOpcode() == ISD::TRUNCATE && + N0.getOperand(0).getValueType() == N1.getOperand(0).getValueType() && + ((!ISD::isSignedIntSetCC(Cond) && N0->getFlags().hasNoUnsignedWrap() && + N1->getFlags().hasNoUnsignedWrap()) || + (!ISD::isUnsignedIntSetCC(Cond) && N0->getFlags().hasNoSignedWrap() && + N1->getFlags().hasNoSignedWrap())) && + isTypeDesirableForOp(ISD::SETCC, N0.getOperand(0).getValueType())) { + return DAG.getSetCC(dl, VT, N0.getOperand(0), N1.getOperand(0), Cond); + } + // Could not fold it. return SDValue(); } @@ -6482,8 +6515,8 @@ SDValue TargetLowering::buildSDIVPow2WithCMov( Created.push_back(CMov.getNode()); // Divide by pow2. - SDValue SRA = - DAG.getNode(ISD::SRA, DL, VT, CMov, DAG.getConstant(Lg2, DL, VT)); + SDValue SRA = DAG.getNode(ISD::SRA, DL, VT, CMov, + DAG.getShiftAmountConstant(Lg2, VT, DL)); // If we're dividing by a positive value, we're done. Otherwise, we must // negate the result. diff --git a/llvm/lib/CodeGen/TargetInstrInfo.cpp b/llvm/lib/CodeGen/TargetInstrInfo.cpp index 705e046e..9e49ddd 100644 --- a/llvm/lib/CodeGen/TargetInstrInfo.cpp +++ b/llvm/lib/CodeGen/TargetInstrInfo.cpp @@ -993,7 +993,7 @@ static bool canCombine(MachineBasicBlock &MBB, MachineOperand &MO, MI = MRI.getUniqueVRegDef(MO.getReg()); // And it needs to be in the trace (otherwise, it won't have a depth). if (!MI || MI->getParent() != &MBB || - ((unsigned)MI->getOpcode() != CombineOpc && CombineOpc != 0)) + (MI->getOpcode() != CombineOpc && CombineOpc != 0)) return false; // Must only used by the user we combine with. if (!MRI.hasOneNonDBGUse(MI->getOperand(0).getReg())) diff --git a/llvm/lib/CodeGen/TargetLoweringBase.cpp b/llvm/lib/CodeGen/TargetLoweringBase.cpp index 3c91b0e..9f525ea 100644 --- a/llvm/lib/CodeGen/TargetLoweringBase.cpp +++ b/llvm/lib/CodeGen/TargetLoweringBase.cpp @@ -697,7 +697,6 @@ TargetLoweringBase::TargetLoweringBase(const TargetMachine &tm) MaxGluedStoresPerMemcpy = 0; MaxStoresPerMemsetOptSize = MaxStoresPerMemcpyOptSize = MaxStoresPerMemmoveOptSize = MaxLoadsPerMemcmpOptSize = 4; - HasMultipleConditionRegisters = false; HasExtractBitsInsn = false; JumpIsExpensive = JumpIsExpensiveOverride; PredictableSelectIsExpensive = false; @@ -905,6 +904,8 @@ void TargetLoweringBase::initActions() { setOperationAction(ISD::GET_FPENV, VT, Expand); setOperationAction(ISD::SET_FPENV, VT, Expand); setOperationAction(ISD::RESET_FPENV, VT, Expand); + + setOperationAction(ISD::MSTORE, VT, Expand); } // Most targets ignore the @llvm.prefetch intrinsic. diff --git a/llvm/lib/CodeGen/TargetLoweringObjectFileImpl.cpp b/llvm/lib/CodeGen/TargetLoweringObjectFileImpl.cpp index e9172f4..d19ef92 100644 --- a/llvm/lib/CodeGen/TargetLoweringObjectFileImpl.cpp +++ b/llvm/lib/CodeGen/TargetLoweringObjectFileImpl.cpp @@ -402,8 +402,8 @@ void TargetLoweringObjectFileELF::emitPersonalityValue( const MachineModuleInfo *MMI) const { SmallString<64> NameData("DW.ref."); NameData += Sym->getName(); - MCSymbolELF *Label = - cast<MCSymbolELF>(getContext().getOrCreateSymbol(NameData)); + auto *Label = + static_cast<MCSymbolELF *>(getContext().getOrCreateSymbol(NameData)); Streamer.emitSymbolAttribute(Label, MCSA_Hidden); Streamer.emitSymbolAttribute(Label, MCSA_Weak); unsigned Flags = ELF::SHF_ALLOC | ELF::SHF_WRITE | ELF::SHF_GROUP; @@ -581,7 +581,8 @@ static const MCSymbolELF *getLinkedToSymbol(const GlobalObject *GO, auto *VM = cast<ValueAsMetadata>(MD->getOperand(0).get()); auto *OtherGV = dyn_cast<GlobalValue>(VM->getValue()); - return OtherGV ? dyn_cast<MCSymbolELF>(TM.getSymbol(OtherGV)) : nullptr; + return OtherGV ? static_cast<const MCSymbolELF *>(TM.getSymbol(OtherGV)) + : nullptr; } static unsigned getEntrySizeForKind(SectionKind Kind) { @@ -1011,7 +1012,7 @@ MCSection *TargetLoweringObjectFileELF::getSectionForLSDA( (getContext().getAsmInfo()->useIntegratedAssembler() && getContext().getAsmInfo()->binutilsIsAtLeast(2, 36))) { Flags |= ELF::SHF_LINK_ORDER; - LinkedToSym = cast<MCSymbolELF>(&FnSym); + LinkedToSym = static_cast<const MCSymbolELF *>(&FnSym); } // Append the function name as the suffix like GCC, assuming @@ -2370,9 +2371,10 @@ bool TargetLoweringObjectFileXCOFF::ShouldSetSSPCanaryBitInTB( MCSymbol * TargetLoweringObjectFileXCOFF::getEHInfoTableSymbol(const MachineFunction *MF) { - MCSymbol *EHInfoSym = MF->getContext().getOrCreateSymbol( - "__ehinfo." + Twine(MF->getFunctionNumber())); - cast<MCSymbolXCOFF>(EHInfoSym)->setEHInfo(); + auto *EHInfoSym = + static_cast<MCSymbolXCOFF *>(MF->getContext().getOrCreateSymbol( + "__ehinfo." + Twine(MF->getFunctionNumber()))); + EHInfoSym->setEHInfo(); return EHInfoSym; } @@ -2510,7 +2512,8 @@ MCSection *TargetLoweringObjectFileXCOFF::SelectSectionForGlobal( if (Kind.isText()) { if (TM.getFunctionSections()) { - return cast<MCSymbolXCOFF>(getFunctionEntryPointSymbol(GO, TM)) + return static_cast<const MCSymbolXCOFF *>( + getFunctionEntryPointSymbol(GO, TM)) ->getRepresentedCsect(); } return TextSection; @@ -2713,7 +2716,7 @@ MCSection *TargetLoweringObjectFileXCOFF::getSectionForTOCEntry( const MCSymbol *Sym, const TargetMachine &TM) const { const XCOFF::StorageMappingClass SMC = [](const MCSymbol *Sym, const TargetMachine &TM) { - const MCSymbolXCOFF *XSym = cast<MCSymbolXCOFF>(Sym); + auto *XSym = static_cast<const MCSymbolXCOFF *>(Sym); // The "_$TLSML" symbol for TLS local-dynamic mode requires XMC_TC, // otherwise the AIX assembler will complain. @@ -2737,8 +2740,8 @@ MCSection *TargetLoweringObjectFileXCOFF::getSectionForTOCEntry( }(Sym, TM); return getContext().getXCOFFSection( - cast<MCSymbolXCOFF>(Sym)->getSymbolTableName(), SectionKind::getData(), - XCOFF::CsectProperties(SMC, XCOFF::XTY_SD)); + static_cast<const MCSymbolXCOFF *>(Sym)->getSymbolTableName(), + SectionKind::getData(), XCOFF::CsectProperties(SMC, XCOFF::XTY_SD)); } MCSection *TargetLoweringObjectFileXCOFF::getSectionForLSDA( |