diff options
Diffstat (limited to 'llvm/lib')
-rw-r--r-- | llvm/lib/Analysis/InstructionSimplify.cpp | 4 | ||||
-rw-r--r-- | llvm/lib/Analysis/ValueTracking.cpp | 98 | ||||
-rw-r--r-- | llvm/lib/CodeGen/AsmPrinter/AsmPrinter.cpp | 8 | ||||
-rw-r--r-- | llvm/lib/CodeGen/TargetInstrInfo.cpp | 6 | ||||
-rw-r--r-- | llvm/lib/IR/Core.cpp | 4 | ||||
-rw-r--r-- | llvm/lib/MC/MCAsmInfoELF.cpp | 5 | ||||
-rw-r--r-- | llvm/lib/MC/MCELFStreamer.cpp | 2 | ||||
-rw-r--r-- | llvm/lib/Target/ARM/ARMInstrCDE.td | 1 | ||||
-rw-r--r-- | llvm/lib/Target/ARM/ARMInstrMVE.td | 1 | ||||
-rw-r--r-- | llvm/lib/Target/ARM/Disassembler/ARMDisassembler.cpp | 101 | ||||
-rw-r--r-- | llvm/lib/Target/BPF/MCTargetDesc/BPFMCAsmInfo.h | 2 | ||||
-rw-r--r-- | llvm/lib/Transforms/InstCombine/InstCombineSelect.cpp | 30 | ||||
-rw-r--r-- | llvm/lib/Transforms/Vectorize/LoopVectorize.cpp | 16 | ||||
-rw-r--r-- | llvm/lib/Transforms/Vectorize/VPlan.cpp | 9 | ||||
-rw-r--r-- | llvm/lib/Transforms/Vectorize/VPlanHelpers.h | 16 | ||||
-rw-r--r-- | llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp | 123 |
16 files changed, 203 insertions, 223 deletions
diff --git a/llvm/lib/Analysis/InstructionSimplify.cpp b/llvm/lib/Analysis/InstructionSimplify.cpp index 07f4a8e..0d978d4 100644 --- a/llvm/lib/Analysis/InstructionSimplify.cpp +++ b/llvm/lib/Analysis/InstructionSimplify.cpp @@ -4164,6 +4164,10 @@ static Value *simplifyFCmpInst(CmpPredicate Pred, Value *LHS, Value *RHS, return ConstantInt::get(RetTy, Pred == CmpInst::FCMP_UNO); } + if (std::optional<bool> Res = + isImpliedByDomCondition(Pred, LHS, RHS, Q.CxtI, Q.DL)) + return ConstantInt::getBool(RetTy, *Res); + const APFloat *C = nullptr; match(RHS, m_APFloatAllowPoison(C)); std::optional<KnownFPClass> FullKnownClassLHS; diff --git a/llvm/lib/Analysis/ValueTracking.cpp b/llvm/lib/Analysis/ValueTracking.cpp index 1eda7a7..a42c061 100644 --- a/llvm/lib/Analysis/ValueTracking.cpp +++ b/llvm/lib/Analysis/ValueTracking.cpp @@ -39,6 +39,7 @@ #include "llvm/IR/Attributes.h" #include "llvm/IR/BasicBlock.h" #include "llvm/IR/Constant.h" +#include "llvm/IR/ConstantFPRange.h" #include "llvm/IR/ConstantRange.h" #include "llvm/IR/Constants.h" #include "llvm/IR/DerivedTypes.h" @@ -9474,6 +9475,69 @@ isImpliedCondICmps(CmpPredicate LPred, const Value *L0, const Value *L1, return std::nullopt; } +/// Return true if LHS implies RHS (expanded to its components as "R0 RPred R1") +/// is true. Return false if LHS implies RHS is false. Otherwise, return +/// std::nullopt if we can't infer anything. +static std::optional<bool> +isImpliedCondFCmps(FCmpInst::Predicate LPred, const Value *L0, const Value *L1, + FCmpInst::Predicate RPred, const Value *R0, const Value *R1, + const DataLayout &DL, bool LHSIsTrue) { + // The rest of the logic assumes the LHS condition is true. If that's not the + // case, invert the predicate to make it so. + if (!LHSIsTrue) + LPred = FCmpInst::getInversePredicate(LPred); + + // We can have non-canonical operands, so try to normalize any common operand + // to L0/R0. + if (L0 == R1) { + std::swap(R0, R1); + RPred = FCmpInst::getSwappedPredicate(RPred); + } + if (R0 == L1) { + std::swap(L0, L1); + LPred = FCmpInst::getSwappedPredicate(LPred); + } + if (L1 == R1) { + // If we have L0 == R0 and L1 == R1, then make L1/R1 the constants. + if (L0 != R0 || match(L0, m_ImmConstant())) { + std::swap(L0, L1); + LPred = ICmpInst::getSwappedCmpPredicate(LPred); + std::swap(R0, R1); + RPred = ICmpInst::getSwappedCmpPredicate(RPred); + } + } + + // Can we infer anything when the two compares have matching operands? + if (L0 == R0 && L1 == R1) { + if ((LPred & RPred) == LPred) + return true; + if ((LPred & ~RPred) == LPred) + return false; + } + + // See if we can infer anything if operand-0 matches and we have at least one + // constant. + const APFloat *L1C, *R1C; + if (L0 == R0 && match(L1, m_APFloat(L1C)) && match(R1, m_APFloat(R1C))) { + if (std::optional<ConstantFPRange> DomCR = + ConstantFPRange::makeExactFCmpRegion(LPred, *L1C)) { + if (std::optional<ConstantFPRange> ImpliedCR = + ConstantFPRange::makeExactFCmpRegion(RPred, *R1C)) { + if (ImpliedCR->contains(*DomCR)) + return true; + } + if (std::optional<ConstantFPRange> ImpliedCR = + ConstantFPRange::makeExactFCmpRegion( + FCmpInst::getInversePredicate(RPred), *R1C)) { + if (ImpliedCR->contains(*DomCR)) + return false; + } + } + } + + return std::nullopt; +} + /// Return true if LHS implies RHS is true. Return false if LHS implies RHS is /// false. Otherwise, return std::nullopt if we can't infer anything. We /// expect the RHS to be an icmp and the LHS to be an 'and', 'or', or a 'select' @@ -9529,15 +9593,24 @@ llvm::isImpliedCondition(const Value *LHS, CmpPredicate RHSPred, LHSIsTrue = !LHSIsTrue; // Both LHS and RHS are icmps. - if (const auto *LHSCmp = dyn_cast<ICmpInst>(LHS)) - return isImpliedCondICmps(LHSCmp->getCmpPredicate(), LHSCmp->getOperand(0), - LHSCmp->getOperand(1), RHSPred, RHSOp0, RHSOp1, - DL, LHSIsTrue); - const Value *V; - if (match(LHS, m_NUWTrunc(m_Value(V)))) - return isImpliedCondICmps(CmpInst::ICMP_NE, V, - ConstantInt::get(V->getType(), 0), RHSPred, - RHSOp0, RHSOp1, DL, LHSIsTrue); + if (RHSOp0->getType()->getScalarType()->isIntOrPtrTy()) { + if (const auto *LHSCmp = dyn_cast<ICmpInst>(LHS)) + return isImpliedCondICmps(LHSCmp->getCmpPredicate(), + LHSCmp->getOperand(0), LHSCmp->getOperand(1), + RHSPred, RHSOp0, RHSOp1, DL, LHSIsTrue); + const Value *V; + if (match(LHS, m_NUWTrunc(m_Value(V)))) + return isImpliedCondICmps(CmpInst::ICMP_NE, V, + ConstantInt::get(V->getType(), 0), RHSPred, + RHSOp0, RHSOp1, DL, LHSIsTrue); + } else { + assert(RHSOp0->getType()->isFPOrFPVectorTy() && + "Expected floating point type only!"); + if (const auto *LHSCmp = dyn_cast<FCmpInst>(LHS)) + return isImpliedCondFCmps(LHSCmp->getPredicate(), LHSCmp->getOperand(0), + LHSCmp->getOperand(1), RHSPred, RHSOp0, RHSOp1, + DL, LHSIsTrue); + } /// The LHS should be an 'or', 'and', or a 'select' instruction. We expect /// the RHS to be an icmp. @@ -9574,6 +9647,13 @@ std::optional<bool> llvm::isImpliedCondition(const Value *LHS, const Value *RHS, return InvertRHS ? !*Implied : *Implied; return std::nullopt; } + if (const FCmpInst *RHSCmp = dyn_cast<FCmpInst>(RHS)) { + if (auto Implied = isImpliedCondition( + LHS, RHSCmp->getPredicate(), RHSCmp->getOperand(0), + RHSCmp->getOperand(1), DL, LHSIsTrue, Depth)) + return InvertRHS ? !*Implied : *Implied; + return std::nullopt; + } const Value *V; if (match(RHS, m_NUWTrunc(m_Value(V)))) { diff --git a/llvm/lib/CodeGen/AsmPrinter/AsmPrinter.cpp b/llvm/lib/CodeGen/AsmPrinter/AsmPrinter.cpp index 11efe49..10df9c1 100644 --- a/llvm/lib/CodeGen/AsmPrinter/AsmPrinter.cpp +++ b/llvm/lib/CodeGen/AsmPrinter/AsmPrinter.cpp @@ -2866,9 +2866,11 @@ bool AsmPrinter::doFinalization(Module &M) { // If we don't have any trampolines, then we don't require stack memory // to be executable. Some targets have a directive to declare this. Function *InitTrampolineIntrinsic = M.getFunction("llvm.init.trampoline"); - if (!InitTrampolineIntrinsic || InitTrampolineIntrinsic->use_empty()) - if (MCSection *S = MAI->getNonexecutableStackSection(OutContext)) - OutStreamer->switchSection(S); + bool HasTrampolineUses = + InitTrampolineIntrinsic && !InitTrampolineIntrinsic->use_empty(); + MCSection *S = MAI->getStackSection(OutContext, /*Exec=*/HasTrampolineUses); + if (S) + OutStreamer->switchSection(S); if (TM.Options.EmitAddrsig) { // Emit address-significance attributes for all globals. diff --git a/llvm/lib/CodeGen/TargetInstrInfo.cpp b/llvm/lib/CodeGen/TargetInstrInfo.cpp index 2f3b7a2..3c41bbe 100644 --- a/llvm/lib/CodeGen/TargetInstrInfo.cpp +++ b/llvm/lib/CodeGen/TargetInstrInfo.cpp @@ -1657,12 +1657,6 @@ bool TargetInstrInfo::isReMaterializableImpl( // same virtual register, though. if (MO.isDef() && Reg != DefReg) return false; - - // Don't allow any virtual-register uses. Rematting an instruction with - // virtual register uses would length the live ranges of the uses, which - // is not necessarily a good idea, certainly not "trivial". - if (MO.isUse()) - return false; } // Everything checked out. diff --git a/llvm/lib/IR/Core.cpp b/llvm/lib/IR/Core.cpp index 8b5965b..df0c85b 100644 --- a/llvm/lib/IR/Core.cpp +++ b/llvm/lib/IR/Core.cpp @@ -2994,6 +2994,8 @@ LLVMValueRef LLVMIsATerminatorInst(LLVMValueRef Inst) { LLVMDbgRecordRef LLVMGetFirstDbgRecord(LLVMValueRef Inst) { Instruction *Instr = unwrap<Instruction>(Inst); + if (!Instr->DebugMarker) + return nullptr; auto I = Instr->DebugMarker->StoredDbgRecords.begin(); if (I == Instr->DebugMarker->StoredDbgRecords.end()) return nullptr; @@ -3002,6 +3004,8 @@ LLVMDbgRecordRef LLVMGetFirstDbgRecord(LLVMValueRef Inst) { LLVMDbgRecordRef LLVMGetLastDbgRecord(LLVMValueRef Inst) { Instruction *Instr = unwrap<Instruction>(Inst); + if (!Instr->DebugMarker) + return nullptr; auto I = Instr->DebugMarker->StoredDbgRecords.rbegin(); if (I == Instr->DebugMarker->StoredDbgRecords.rend()) return nullptr; diff --git a/llvm/lib/MC/MCAsmInfoELF.cpp b/llvm/lib/MC/MCAsmInfoELF.cpp index cdae9d7..98090d3 100644 --- a/llvm/lib/MC/MCAsmInfoELF.cpp +++ b/llvm/lib/MC/MCAsmInfoELF.cpp @@ -27,12 +27,13 @@ using namespace llvm; void MCAsmInfoELF::anchor() {} -MCSection *MCAsmInfoELF::getNonexecutableStackSection(MCContext &Ctx) const { +MCSection *MCAsmInfoELF::getStackSection(MCContext &Ctx, bool Exec) const { // Solaris doesn't know/doesn't care about .note.GNU-stack sections, so // don't emit them. if (Ctx.getTargetTriple().isOSSolaris()) return nullptr; - return Ctx.getELFSection(".note.GNU-stack", ELF::SHT_PROGBITS, 0); + return Ctx.getELFSection(".note.GNU-stack", ELF::SHT_PROGBITS, + Exec ? ELF::SHF_EXECINSTR : 0U); } bool MCAsmInfoELF::useCodeAlign(const MCSection &Sec) const { diff --git a/llvm/lib/MC/MCELFStreamer.cpp b/llvm/lib/MC/MCELFStreamer.cpp index 2881d7c..1bc1b92 100644 --- a/llvm/lib/MC/MCELFStreamer.cpp +++ b/llvm/lib/MC/MCELFStreamer.cpp @@ -54,7 +54,7 @@ void MCELFStreamer::initSections(bool NoExecStack, const MCSubtargetInfo &STI) { &STI); if (NoExecStack) - switchSection(Ctx.getAsmInfo()->getNonexecutableStackSection(Ctx)); + switchSection(Ctx.getAsmInfo()->getStackSection(Ctx, /*Exec=*/false)); } void MCELFStreamer::emitLabel(MCSymbol *S, SMLoc Loc) { diff --git a/llvm/lib/Target/ARM/ARMInstrCDE.td b/llvm/lib/Target/ARM/ARMInstrCDE.td index 54e27a6..f4326de 100644 --- a/llvm/lib/Target/ARM/ARMInstrCDE.td +++ b/llvm/lib/Target/ARM/ARMInstrCDE.td @@ -268,6 +268,7 @@ class CDE_Vec_Instr<bit acc, dag oops, dag iops, string asm, string cstr, !con(iops, (ins vpred:$vp)), asm, !strconcat(cstr, vpred.vpred_constraint)>, CDE_RequiresQReg { + bits<0> vp; } diff --git a/llvm/lib/Target/ARM/ARMInstrMVE.td b/llvm/lib/Target/ARM/ARMInstrMVE.td index 9dffd94..e244134 100644 --- a/llvm/lib/Target/ARM/ARMInstrMVE.td +++ b/llvm/lib/Target/ARM/ARMInstrMVE.td @@ -409,6 +409,7 @@ class MVE_p<dag oops, dag iops, InstrItinClass itin, string iname, !strconcat(iname, "${vp}", !if(!eq(suffix, ""), "", !strconcat(".", suffix))), ops, !strconcat(cstr, vpred.vpred_constraint), vecsize, pattern> { + bits<0> vp; let Inst{31-29} = 0b111; let Inst{27-26} = 0b11; } diff --git a/llvm/lib/Target/ARM/Disassembler/ARMDisassembler.cpp b/llvm/lib/Target/ARM/Disassembler/ARMDisassembler.cpp index d358913..e67db8e 100644 --- a/llvm/lib/Target/ARM/Disassembler/ARMDisassembler.cpp +++ b/llvm/lib/Target/ARM/Disassembler/ARMDisassembler.cpp @@ -639,6 +639,43 @@ static DecodeStatus DecodeCCOutOperand(MCInst &Inst, return MCDisassembler::Success; } +static DecodeStatus DecodeVpredNOperand(MCInst &Inst, + const MCDisassembler *Decoder) { + const auto *D = static_cast<const ARMDisassembler *>(Decoder); + unsigned VCC = D->VPTBlock.getVPTPred(); + MCRegister CondReg = VCC == ARMVCC::None ? ARM::NoRegister : ARM::P0; + + Inst.addOperand(MCOperand::createImm(VCC)); // $cond + Inst.addOperand(MCOperand::createReg(CondReg)); // $cond_reg + Inst.addOperand(MCOperand::createReg(ARM::NoRegister)); // $tp_reg + + return MCDisassembler::Success; +} + +static DecodeStatus DecodeVpredROperand(MCInst &Inst, + const MCDisassembler *Decoder) { + const auto *D = static_cast<const ARMDisassembler *>(Decoder); + unsigned VCC = D->VPTBlock.getVPTPred(); + MCRegister CondReg = VCC == ARMVCC::None ? ARM::NoRegister : ARM::P0; + + Inst.addOperand(MCOperand::createImm(VCC)); // $cond + Inst.addOperand(MCOperand::createReg(CondReg)); // $cond_reg + Inst.addOperand(MCOperand::createReg(ARM::NoRegister)); // $tp_reg + + // The last sub-operand ($inactive) is tied to an output operand. + // The output operand has already been decoded, so just copy it. + const MCInstrDesc &MCID = D->MCII->get(Inst.getOpcode()); + unsigned InactiveOpIdx = Inst.getNumOperands(); + int TiedOpIdx = MCID.getOperandConstraint(InactiveOpIdx, MCOI::TIED_TO); + assert(TiedOpIdx >= 0 && + "Inactive register in vpred_r is not tied to an output!"); + + // Make a copy of the operand to ensure it is not invalidated when MI grows. + Inst.addOperand(MCOperand(Inst.getOperand(TiedOpIdx))); // $inactive + + return MCDisassembler::Success; +} + static DecodeStatus DecodeSORegImmOperand(MCInst &Inst, unsigned Val, uint64_t Address, const MCDisassembler *Decoder) { @@ -2777,6 +2814,7 @@ static DecodeStatus DecodeMVEModImmInstruction(MCInst &Inst, unsigned Insn, Inst.addOperand(MCOperand::createImm(imm)); + Check(S, DecodeVpredROperand(Inst, Decoder)); return S; } @@ -2802,6 +2840,7 @@ static DecodeStatus DecodeMVEVADCInstruction(MCInst &Inst, unsigned Insn, if (!fieldFromInstruction(Insn, 12, 1)) // I bit clear => need input FPSCR Inst.addOperand(MCOperand::createReg(ARM::FPSCR_NZCV)); + Check(S, DecodeVpredROperand(Inst, Decoder)); return S; } @@ -5466,30 +5505,6 @@ static DecodeStatus DecodeVPTMaskOperand(MCInst &Inst, unsigned Val, return S; } -static DecodeStatus DecodeVpredROperand(MCInst &Inst, unsigned RegNo, - uint64_t Address, - const MCDisassembler *Decoder) { - // The vpred_r operand type includes an MQPR register field derived - // from the encoding. But we don't actually want to add an operand - // to the MCInst at this stage, because AddThumbPredicate will do it - // later, and will infer the register number from the TIED_TO - // constraint. So this is a deliberately empty decoder method that - // will inhibit the auto-generated disassembly code from adding an - // operand at all. - return MCDisassembler::Success; -} - -[[maybe_unused]] static DecodeStatus -DecodeVpredNOperand(MCInst &Inst, unsigned RegNo, uint64_t Address, - const MCDisassembler *Decoder) { - // Similar to above, we want to ensure that no operands are added for the - // vpred operands. (This is marked "maybe_unused" for the moment; because - // DecoderEmitter currently (wrongly) omits operands with no instruction bits, - // the decoder doesn't actually call it yet. That will be addressed in a - // future change.) - return MCDisassembler::Success; -} - static DecodeStatus DecodeRestrictedIPredicateOperand(MCInst &Inst, unsigned Val, uint64_t Address, const MCDisassembler *Decoder) { @@ -5668,6 +5683,7 @@ DecodeMVE_MEM_pre(MCInst &Inst, unsigned Val, uint64_t Address, if (!Check(S, AddrDecoder(Inst, addr, Address, Decoder))) return MCDisassembler::Fail; + Check(S, DecodeVpredNOperand(Inst, Decoder)); return S; } @@ -5871,7 +5887,7 @@ static DecodeStatus DecodeMVEVCVTt1fp(MCInst &Inst, unsigned Insn, return MCDisassembler::Fail; if (!Check(S, DecodeVCVTImmOperand(Inst, imm6, Address, Decoder))) return MCDisassembler::Fail; - + Check(S, DecodeVpredROperand(Inst, Decoder)); return S; } @@ -5906,6 +5922,7 @@ static DecodeStatus DecodeMVEVCMP(MCInst &Inst, unsigned Insn, uint64_t Address, if (!Check(S, predicate_decoder(Inst, fc, Address, Decoder))) return MCDisassembler::Fail; + Check(S, DecodeVpredNOperand(Inst, Decoder)); return S; } @@ -5916,6 +5933,7 @@ static DecodeStatus DecodeMveVCTP(MCInst &Inst, unsigned Insn, uint64_t Address, unsigned Rn = fieldFromInstruction(Insn, 16, 4); if (!Check(S, DecoderGPRRegisterClass(Inst, Rn, Address, Decoder))) return MCDisassembler::Fail; + Check(S, DecodeVpredNOperand(Inst, Decoder)); return S; } @@ -5925,6 +5943,7 @@ static DecodeStatus DecodeMVEVPNOT(MCInst &Inst, unsigned Insn, DecodeStatus S = MCDisassembler::Success; Inst.addOperand(MCOperand::createReg(ARM::VPR)); Inst.addOperand(MCOperand::createReg(ARM::VPR)); + Check(S, DecodeVpredNOperand(Inst, Decoder)); return S; } @@ -6199,15 +6218,13 @@ ARMDisassembler::AddThumbPredicate(MCInst &MI) const { (isVectorPredicable(MI) && ITBlock.instrInITBlock())) S = SoftFail; - // If we're in an IT/VPT block, base the predicate on that. Otherwise, + // If we're in an IT block, base the predicate on that. Otherwise, // assume a predicate of AL. unsigned CC = ARMCC::AL; - unsigned VCC = ARMVCC::None; if (ITBlock.instrInITBlock()) { CC = ITBlock.getITCC(); ITBlock.advanceITState(); } else if (VPTBlock.instrInVPTBlock()) { - VCC = VPTBlock.getVPTPred(); VPTBlock.advanceVPTState(); } @@ -6230,34 +6247,6 @@ ARMDisassembler::AddThumbPredicate(MCInst &MI) const { Check(S, SoftFail); } - MCInst::iterator VCCI = MI.begin(); - unsigned VCCPos; - for (VCCPos = 0; VCCPos < MCID.NumOperands; ++VCCPos, ++VCCI) { - if (ARM::isVpred(MCID.operands()[VCCPos].OperandType) || VCCI == MI.end()) - break; - } - - if (isVectorPredicable(MI)) { - VCCI = MI.insert(VCCI, MCOperand::createImm(VCC)); - ++VCCI; - if (VCC == ARMVCC::None) - VCCI = MI.insert(VCCI, MCOperand::createReg(0)); - else - VCCI = MI.insert(VCCI, MCOperand::createReg(ARM::P0)); - ++VCCI; - VCCI = MI.insert(VCCI, MCOperand::createReg(0)); - ++VCCI; - if (MCID.operands()[VCCPos].OperandType == ARM::OPERAND_VPRED_R) { - int TiedOp = MCID.getOperandConstraint(VCCPos + 3, MCOI::TIED_TO); - assert(TiedOp >= 0 && - "Inactive register in vpred_r is not tied to an output!"); - // Copy the operand to ensure it's not invalidated when MI grows. - MI.insert(VCCI, MCOperand(MI.getOperand(TiedOp))); - } - } else if (VCC != ARMVCC::None) { - Check(S, SoftFail); - } - return S; } diff --git a/llvm/lib/Target/BPF/MCTargetDesc/BPFMCAsmInfo.h b/llvm/lib/Target/BPF/MCTargetDesc/BPFMCAsmInfo.h index dfd896f..8d8066a 100644 --- a/llvm/lib/Target/BPF/MCTargetDesc/BPFMCAsmInfo.h +++ b/llvm/lib/Target/BPF/MCTargetDesc/BPFMCAsmInfo.h @@ -49,7 +49,7 @@ public: DwarfUsesRelocationsAcrossSections = enable; } - MCSection *getNonexecutableStackSection(MCContext &Ctx) const override { + MCSection *getStackSection(MCContext &Ctx, bool Exec) const override { return nullptr; } }; diff --git a/llvm/lib/Transforms/InstCombine/InstCombineSelect.cpp b/llvm/lib/Transforms/InstCombine/InstCombineSelect.cpp index 3df448d..8f60e50 100644 --- a/llvm/lib/Transforms/InstCombine/InstCombineSelect.cpp +++ b/llvm/lib/Transforms/InstCombine/InstCombineSelect.cpp @@ -17,6 +17,7 @@ #include "llvm/Analysis/AssumptionCache.h" #include "llvm/Analysis/CmpInstAnalysis.h" #include "llvm/Analysis/InstructionSimplify.h" +#include "llvm/Analysis/Loads.h" #include "llvm/Analysis/OverflowInstAnalysis.h" #include "llvm/Analysis/ValueTracking.h" #include "llvm/Analysis/VectorUtils.h" @@ -42,6 +43,7 @@ #include "llvm/Support/KnownBits.h" #include "llvm/Transforms/InstCombine/InstCombiner.h" #include <cassert> +#include <optional> #include <utility> #define DEBUG_TYPE "instcombine" @@ -1451,10 +1453,16 @@ Instruction *InstCombinerImpl::foldSelectValueEquivalence(SelectInst &Sel, return nullptr; }; - if (Instruction *R = ReplaceOldOpWithNewOp(CmpLHS, CmpRHS)) - return R; - if (Instruction *R = ReplaceOldOpWithNewOp(CmpRHS, CmpLHS)) - return R; + bool CanReplaceCmpLHSWithRHS = canReplacePointersIfEqual(CmpLHS, CmpRHS, DL); + if (CanReplaceCmpLHSWithRHS) { + if (Instruction *R = ReplaceOldOpWithNewOp(CmpLHS, CmpRHS)) + return R; + } + bool CanReplaceCmpRHSWithLHS = canReplacePointersIfEqual(CmpRHS, CmpLHS, DL); + if (CanReplaceCmpRHSWithLHS) { + if (Instruction *R = ReplaceOldOpWithNewOp(CmpRHS, CmpLHS)) + return R; + } auto *FalseInst = dyn_cast<Instruction>(FalseVal); if (!FalseInst) @@ -1469,12 +1477,14 @@ Instruction *InstCombinerImpl::foldSelectValueEquivalence(SelectInst &Sel, // Example: // (X == 42) ? 43 : (X + 1) --> (X == 42) ? (X + 1) : (X + 1) --> X + 1 SmallVector<Instruction *> DropFlags; - if (simplifyWithOpReplaced(FalseVal, CmpLHS, CmpRHS, SQ, - /* AllowRefinement */ false, - &DropFlags) == TrueVal || - simplifyWithOpReplaced(FalseVal, CmpRHS, CmpLHS, SQ, - /* AllowRefinement */ false, - &DropFlags) == TrueVal) { + if ((CanReplaceCmpLHSWithRHS && + simplifyWithOpReplaced(FalseVal, CmpLHS, CmpRHS, SQ, + /* AllowRefinement */ false, + &DropFlags) == TrueVal) || + (CanReplaceCmpRHSWithLHS && + simplifyWithOpReplaced(FalseVal, CmpRHS, CmpLHS, SQ, + /* AllowRefinement */ false, + &DropFlags) == TrueVal)) { for (Instruction *I : DropFlags) { I->dropPoisonGeneratingAnnotations(); Worklist.add(I); diff --git a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp index cb6bfb2..56a3d6d 100644 --- a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp +++ b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp @@ -3903,8 +3903,7 @@ void LoopVectorizationPlanner::emitInvalidCostRemarks( if (VF.isScalar()) continue; - VPCostContext CostCtx(CM.TTI, *CM.TLI, *Plan, CM, CM.CostKind, - *CM.PSE.getSE()); + VPCostContext CostCtx(CM.TTI, *CM.TLI, *Plan, CM, CM.CostKind); precomputeCosts(*Plan, VF, CostCtx); auto Iter = vp_depth_first_deep(Plan->getVectorLoopRegion()->getEntry()); for (VPBasicBlock *VPBB : VPBlockUtils::blocksOnly<VPBasicBlock>(Iter)) { @@ -4161,8 +4160,7 @@ VectorizationFactor LoopVectorizationPlanner::selectVectorizationFactor() { // Add on other costs that are modelled in VPlan, but not in the legacy // cost model. - VPCostContext CostCtx(CM.TTI, *CM.TLI, *P, CM, CM.CostKind, - *CM.PSE.getSE()); + VPCostContext CostCtx(CM.TTI, *CM.TLI, *P, CM, CM.CostKind); VPRegionBlock *VectorRegion = P->getVectorLoopRegion(); assert(VectorRegion && "Expected to have a vector region!"); for (VPBasicBlock *VPBB : VPBlockUtils::blocksOnly<VPBasicBlock>( @@ -6854,7 +6852,7 @@ LoopVectorizationPlanner::precomputeCosts(VPlan &Plan, ElementCount VF, InstructionCost LoopVectorizationPlanner::cost(VPlan &Plan, ElementCount VF) const { - VPCostContext CostCtx(CM.TTI, *CM.TLI, Plan, CM, CM.CostKind, *PSE.getSE()); + VPCostContext CostCtx(CM.TTI, *CM.TLI, Plan, CM, CM.CostKind); InstructionCost Cost = precomputeCosts(Plan, VF, CostCtx); // Now compute and add the VPlan-based cost. @@ -7087,8 +7085,7 @@ VectorizationFactor LoopVectorizationPlanner::computeBestVF() { // simplifications not accounted for in the legacy cost model. If that's the // case, don't trigger the assertion, as the extra simplifications may cause a // different VF to be picked by the VPlan-based cost model. - VPCostContext CostCtx(CM.TTI, *CM.TLI, BestPlan, CM, CM.CostKind, - *CM.PSE.getSE()); + VPCostContext CostCtx(CM.TTI, *CM.TLI, BestPlan, CM, CM.CostKind); precomputeCosts(BestPlan, BestFactor.Width, CostCtx); // Verify that the VPlan-based and legacy cost models agree, except for VPlans // with early exits and plans with additional VPlan simplifications. The @@ -8624,8 +8621,7 @@ VPlanPtr LoopVectorizationPlanner::tryToBuildVPlanWithVPRecipes( // TODO: Enable following transform when the EVL-version of extended-reduction // and mulacc-reduction are implemented. if (!CM.foldTailWithEVL()) { - VPCostContext CostCtx(CM.TTI, *CM.TLI, *Plan, CM, CM.CostKind, - *CM.PSE.getSE()); + VPCostContext CostCtx(CM.TTI, *CM.TLI, *Plan, CM, CM.CostKind); VPlanTransforms::runPass(VPlanTransforms::convertToAbstractRecipes, *Plan, CostCtx, Range); } @@ -10079,7 +10075,7 @@ bool LoopVectorizePass::processLoop(Loop *L) { bool ForceVectorization = Hints.getForce() == LoopVectorizeHints::FK_Enabled; VPCostContext CostCtx(CM.TTI, *CM.TLI, LVP.getPlanFor(VF.Width), CM, - CM.CostKind, *CM.PSE.getSE()); + CM.CostKind); if (!ForceVectorization && !isOutsideLoopWorkProfitable(Checks, VF, L, PSE, CostCtx, LVP.getPlanFor(VF.Width), SEL, diff --git a/llvm/lib/Transforms/Vectorize/VPlan.cpp b/llvm/lib/Transforms/Vectorize/VPlan.cpp index 2555ebe..07b191a 100644 --- a/llvm/lib/Transforms/Vectorize/VPlan.cpp +++ b/llvm/lib/Transforms/Vectorize/VPlan.cpp @@ -1772,8 +1772,7 @@ VPCostContext::getOperandInfo(VPValue *V) const { } InstructionCost VPCostContext::getScalarizationOverhead( - Type *ResultTy, ArrayRef<const VPValue *> Operands, ElementCount VF, - bool AlwaysIncludeReplicatingR) { + Type *ResultTy, ArrayRef<const VPValue *> Operands, ElementCount VF) { if (VF.isScalar()) return 0; @@ -1793,11 +1792,7 @@ InstructionCost VPCostContext::getScalarizationOverhead( SmallPtrSet<const VPValue *, 4> UniqueOperands; SmallVector<Type *> Tys; for (auto *Op : Operands) { - if (Op->isLiveIn() || - (!AlwaysIncludeReplicatingR && - isa<VPReplicateRecipe, VPPredInstPHIRecipe>(Op)) || - (isa<VPReplicateRecipe>(Op) && - cast<VPReplicateRecipe>(Op)->getOpcode() == Instruction::Load) || + if (Op->isLiveIn() || isa<VPReplicateRecipe, VPPredInstPHIRecipe>(Op) || !UniqueOperands.insert(Op).second) continue; Tys.push_back(toVectorizedTy(Types.inferScalarType(Op), VF)); diff --git a/llvm/lib/Transforms/Vectorize/VPlanHelpers.h b/llvm/lib/Transforms/Vectorize/VPlanHelpers.h index 1580a3b..fc1a09e 100644 --- a/llvm/lib/Transforms/Vectorize/VPlanHelpers.h +++ b/llvm/lib/Transforms/Vectorize/VPlanHelpers.h @@ -349,14 +349,12 @@ struct VPCostContext { LoopVectorizationCostModel &CM; SmallPtrSet<Instruction *, 8> SkipCostComputation; TargetTransformInfo::TargetCostKind CostKind; - ScalarEvolution &SE; VPCostContext(const TargetTransformInfo &TTI, const TargetLibraryInfo &TLI, const VPlan &Plan, LoopVectorizationCostModel &CM, - TargetTransformInfo::TargetCostKind CostKind, - ScalarEvolution &SE) + TargetTransformInfo::TargetCostKind CostKind) : TTI(TTI), TLI(TLI), Types(Plan), LLVMCtx(Plan.getContext()), CM(CM), - CostKind(CostKind), SE(SE) {} + CostKind(CostKind) {} /// Return the cost for \p UI with \p VF using the legacy cost model as /// fallback until computing the cost of all recipes migrates to VPlan. @@ -376,12 +374,10 @@ struct VPCostContext { /// Estimate the overhead of scalarizing a recipe with result type \p ResultTy /// and \p Operands with \p VF. This is a convenience wrapper for the - /// type-based getScalarizationOverhead API. If \p AlwaysIncludeReplicatingR - /// is true, always compute the cost of scalarizing replicating operands. - InstructionCost - getScalarizationOverhead(Type *ResultTy, ArrayRef<const VPValue *> Operands, - ElementCount VF, - bool AlwaysIncludeReplicatingR = false); + /// type-based getScalarizationOverhead API. + InstructionCost getScalarizationOverhead(Type *ResultTy, + ArrayRef<const VPValue *> Operands, + ElementCount VF); }; /// This class can be used to assign names to VPValues. For VPValues without diff --git a/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp b/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp index a88cffc..67b9244 100644 --- a/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp +++ b/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp @@ -40,7 +40,6 @@ #include <cassert> using namespace llvm; -using namespace llvm::VPlanPatternMatch; using VectorParts = SmallVector<Value *, 2>; @@ -304,6 +303,7 @@ VPPartialReductionRecipe::computeCost(ElementCount VF, VPRecipeBase *OpR = Op->getDefiningRecipe(); // If the partial reduction is predicated, a select will be operand 0 + using namespace llvm::VPlanPatternMatch; if (match(getOperand(1), m_Select(m_VPValue(), m_VPValue(Op), m_VPValue()))) { OpR = Op->getDefiningRecipe(); } @@ -1963,6 +1963,7 @@ InstructionCost VPWidenSelectRecipe::computeCost(ElementCount VF, Type *VectorTy = toVectorTy(Ctx.Types.inferScalarType(this), VF); VPValue *Op0, *Op1; + using namespace llvm::VPlanPatternMatch; if (!ScalarCond && ScalarTy->getScalarSizeInBits() == 1 && (match(this, m_LogicalAnd(m_VPValue(Op0), m_VPValue(Op1))) || match(this, m_LogicalOr(m_VPValue(Op0), m_VPValue(Op1))))) { @@ -3110,62 +3111,6 @@ bool VPReplicateRecipe::shouldPack() const { }); } -/// Returns true if \p Ptr is a pointer computation for which the legacy cost -/// model computes a SCEV expression when computing the address cost. -static bool shouldUseAddressAccessSCEV(const VPValue *Ptr) { - auto *PtrR = Ptr->getDefiningRecipe(); - if (!PtrR || !((isa<VPReplicateRecipe>(PtrR) && - cast<VPReplicateRecipe>(PtrR)->getOpcode() == - Instruction::GetElementPtr) || - isa<VPWidenGEPRecipe>(PtrR) || - match(Ptr, m_GetElementPtr(m_VPValue(), m_VPValue())))) - return false; - - // We are looking for a GEP where all indices are either loop invariant or - // inductions. - for (VPValue *Opd : drop_begin(PtrR->operands())) { - if (!Opd->isDefinedOutsideLoopRegions() && - !isa<VPScalarIVStepsRecipe, VPWidenIntOrFpInductionRecipe>(Opd)) - return false; - } - - return true; -} - -/// Returns true if \p V is used as part of the address of another load or -/// store. -static bool isUsedByLoadStoreAddress(const VPUser *V) { - SmallPtrSet<const VPUser *, 4> Seen; - SmallVector<const VPUser *> WorkList = {V}; - - while (!WorkList.empty()) { - auto *Cur = dyn_cast<VPSingleDefRecipe>(WorkList.pop_back_val()); - if (!Cur || !Seen.insert(Cur).second) - continue; - - for (VPUser *U : Cur->users()) { - if (auto *InterleaveR = dyn_cast<VPInterleaveBase>(U)) - if (InterleaveR->getAddr() == Cur) - return true; - if (auto *RepR = dyn_cast<VPReplicateRecipe>(U)) { - if (RepR->getOpcode() == Instruction::Load && - RepR->getOperand(0) == Cur) - return true; - if (RepR->getOpcode() == Instruction::Store && - RepR->getOperand(1) == Cur) - return true; - } - if (auto *MemR = dyn_cast<VPWidenMemoryRecipe>(U)) { - if (MemR->getAddr() == Cur && MemR->isConsecutive()) - return true; - } - } - - append_range(WorkList, cast<VPSingleDefRecipe>(Cur)->users()); - } - return false; -} - InstructionCost VPReplicateRecipe::computeCost(ElementCount VF, VPCostContext &Ctx) const { Instruction *UI = cast<Instruction>(getUnderlyingValue()); @@ -3273,59 +3218,21 @@ InstructionCost VPReplicateRecipe::computeCost(ElementCount VF, } case Instruction::Load: case Instruction::Store: { - if (VF.isScalable() && !isSingleScalar()) - return InstructionCost::getInvalid(); - + if (isSingleScalar()) { + bool IsLoad = UI->getOpcode() == Instruction::Load; + Type *ValTy = Ctx.Types.inferScalarType(IsLoad ? this : getOperand(0)); + Type *ScalarPtrTy = Ctx.Types.inferScalarType(getOperand(IsLoad ? 0 : 1)); + const Align Alignment = getLoadStoreAlignment(UI); + unsigned AS = getLoadStoreAddressSpace(UI); + TTI::OperandValueInfo OpInfo = TTI::getOperandInfo(UI->getOperand(0)); + InstructionCost ScalarMemOpCost = Ctx.TTI.getMemoryOpCost( + UI->getOpcode(), ValTy, Alignment, AS, Ctx.CostKind, OpInfo, UI); + return ScalarMemOpCost + Ctx.TTI.getAddressComputationCost( + ScalarPtrTy, nullptr, nullptr, Ctx.CostKind); + } // TODO: See getMemInstScalarizationCost for how to handle replicating and // predicated cases. - const VPRegionBlock *ParentRegion = getParent()->getParent(); - if (ParentRegion && ParentRegion->isReplicator()) - break; - - bool IsLoad = UI->getOpcode() == Instruction::Load; - const VPValue *PtrOp = getOperand(!IsLoad); - // TODO: Handle cases where we need to pass a SCEV to - // getAddressComputationCost. - if (shouldUseAddressAccessSCEV(PtrOp)) - break; - - Type *ValTy = Ctx.Types.inferScalarType(IsLoad ? this : getOperand(0)); - Type *ScalarPtrTy = Ctx.Types.inferScalarType(PtrOp); - const Align Alignment = getLoadStoreAlignment(UI); - unsigned AS = getLoadStoreAddressSpace(UI); - TTI::OperandValueInfo OpInfo = TTI::getOperandInfo(UI->getOperand(0)); - InstructionCost ScalarMemOpCost = Ctx.TTI.getMemoryOpCost( - UI->getOpcode(), ValTy, Alignment, AS, Ctx.CostKind, OpInfo); - - Type *PtrTy = isSingleScalar() ? ScalarPtrTy : toVectorTy(ScalarPtrTy, VF); - bool UsedByLoadStoreAddress = isUsedByLoadStoreAddress(this); - InstructionCost ScalarCost = - ScalarMemOpCost + Ctx.TTI.getAddressComputationCost( - PtrTy, UsedByLoadStoreAddress ? nullptr : &Ctx.SE, - nullptr, Ctx.CostKind); - if (isSingleScalar()) - return ScalarCost; - - SmallVector<const VPValue *> OpsToScalarize; - Type *ResultTy = Type::getVoidTy(PtrTy->getContext()); - // Set ResultTy and OpsToScalarize, if scalarization is needed. Currently we - // don't assign scalarization overhead in general, if the target prefers - // vectorized addressing or the loaded value is used as part of an address - // of another load or store. - bool PreferVectorizedAddressing = Ctx.TTI.prefersVectorizedAddressing(); - if (PreferVectorizedAddressing || !UsedByLoadStoreAddress) { - bool EfficientVectorLoadStore = - Ctx.TTI.supportsEfficientVectorElementLoadStore(); - if (!(IsLoad && !PreferVectorizedAddressing) && - !(!IsLoad && EfficientVectorLoadStore)) - append_range(OpsToScalarize, operands()); - - if (!EfficientVectorLoadStore) - ResultTy = Ctx.Types.inferScalarType(this); - } - - return (ScalarCost * VF.getFixedValue()) + - Ctx.getScalarizationOverhead(ResultTy, OpsToScalarize, VF, true); + break; } } |