aboutsummaryrefslogtreecommitdiff
path: root/llvm/lib
diff options
context:
space:
mode:
Diffstat (limited to 'llvm/lib')
-rw-r--r--llvm/lib/Analysis/InstructionSimplify.cpp4
-rw-r--r--llvm/lib/Analysis/ValueTracking.cpp98
-rw-r--r--llvm/lib/CodeGen/AsmPrinter/AsmPrinter.cpp8
-rw-r--r--llvm/lib/CodeGen/TargetInstrInfo.cpp6
-rw-r--r--llvm/lib/IR/Core.cpp4
-rw-r--r--llvm/lib/MC/MCAsmInfoELF.cpp5
-rw-r--r--llvm/lib/MC/MCELFStreamer.cpp2
-rw-r--r--llvm/lib/Target/ARM/ARMInstrCDE.td1
-rw-r--r--llvm/lib/Target/ARM/ARMInstrMVE.td1
-rw-r--r--llvm/lib/Target/ARM/Disassembler/ARMDisassembler.cpp101
-rw-r--r--llvm/lib/Target/BPF/MCTargetDesc/BPFMCAsmInfo.h2
-rw-r--r--llvm/lib/Transforms/InstCombine/InstCombineSelect.cpp30
-rw-r--r--llvm/lib/Transforms/Vectorize/LoopVectorize.cpp16
-rw-r--r--llvm/lib/Transforms/Vectorize/VPlan.cpp9
-rw-r--r--llvm/lib/Transforms/Vectorize/VPlanHelpers.h16
-rw-r--r--llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp123
16 files changed, 203 insertions, 223 deletions
diff --git a/llvm/lib/Analysis/InstructionSimplify.cpp b/llvm/lib/Analysis/InstructionSimplify.cpp
index 07f4a8e..0d978d4 100644
--- a/llvm/lib/Analysis/InstructionSimplify.cpp
+++ b/llvm/lib/Analysis/InstructionSimplify.cpp
@@ -4164,6 +4164,10 @@ static Value *simplifyFCmpInst(CmpPredicate Pred, Value *LHS, Value *RHS,
return ConstantInt::get(RetTy, Pred == CmpInst::FCMP_UNO);
}
+ if (std::optional<bool> Res =
+ isImpliedByDomCondition(Pred, LHS, RHS, Q.CxtI, Q.DL))
+ return ConstantInt::getBool(RetTy, *Res);
+
const APFloat *C = nullptr;
match(RHS, m_APFloatAllowPoison(C));
std::optional<KnownFPClass> FullKnownClassLHS;
diff --git a/llvm/lib/Analysis/ValueTracking.cpp b/llvm/lib/Analysis/ValueTracking.cpp
index 1eda7a7..a42c061 100644
--- a/llvm/lib/Analysis/ValueTracking.cpp
+++ b/llvm/lib/Analysis/ValueTracking.cpp
@@ -39,6 +39,7 @@
#include "llvm/IR/Attributes.h"
#include "llvm/IR/BasicBlock.h"
#include "llvm/IR/Constant.h"
+#include "llvm/IR/ConstantFPRange.h"
#include "llvm/IR/ConstantRange.h"
#include "llvm/IR/Constants.h"
#include "llvm/IR/DerivedTypes.h"
@@ -9474,6 +9475,69 @@ isImpliedCondICmps(CmpPredicate LPred, const Value *L0, const Value *L1,
return std::nullopt;
}
+/// Return true if LHS implies RHS (expanded to its components as "R0 RPred R1")
+/// is true. Return false if LHS implies RHS is false. Otherwise, return
+/// std::nullopt if we can't infer anything.
+static std::optional<bool>
+isImpliedCondFCmps(FCmpInst::Predicate LPred, const Value *L0, const Value *L1,
+ FCmpInst::Predicate RPred, const Value *R0, const Value *R1,
+ const DataLayout &DL, bool LHSIsTrue) {
+ // The rest of the logic assumes the LHS condition is true. If that's not the
+ // case, invert the predicate to make it so.
+ if (!LHSIsTrue)
+ LPred = FCmpInst::getInversePredicate(LPred);
+
+ // We can have non-canonical operands, so try to normalize any common operand
+ // to L0/R0.
+ if (L0 == R1) {
+ std::swap(R0, R1);
+ RPred = FCmpInst::getSwappedPredicate(RPred);
+ }
+ if (R0 == L1) {
+ std::swap(L0, L1);
+ LPred = FCmpInst::getSwappedPredicate(LPred);
+ }
+ if (L1 == R1) {
+ // If we have L0 == R0 and L1 == R1, then make L1/R1 the constants.
+ if (L0 != R0 || match(L0, m_ImmConstant())) {
+ std::swap(L0, L1);
+ LPred = ICmpInst::getSwappedCmpPredicate(LPred);
+ std::swap(R0, R1);
+ RPred = ICmpInst::getSwappedCmpPredicate(RPred);
+ }
+ }
+
+ // Can we infer anything when the two compares have matching operands?
+ if (L0 == R0 && L1 == R1) {
+ if ((LPred & RPred) == LPred)
+ return true;
+ if ((LPred & ~RPred) == LPred)
+ return false;
+ }
+
+ // See if we can infer anything if operand-0 matches and we have at least one
+ // constant.
+ const APFloat *L1C, *R1C;
+ if (L0 == R0 && match(L1, m_APFloat(L1C)) && match(R1, m_APFloat(R1C))) {
+ if (std::optional<ConstantFPRange> DomCR =
+ ConstantFPRange::makeExactFCmpRegion(LPred, *L1C)) {
+ if (std::optional<ConstantFPRange> ImpliedCR =
+ ConstantFPRange::makeExactFCmpRegion(RPred, *R1C)) {
+ if (ImpliedCR->contains(*DomCR))
+ return true;
+ }
+ if (std::optional<ConstantFPRange> ImpliedCR =
+ ConstantFPRange::makeExactFCmpRegion(
+ FCmpInst::getInversePredicate(RPred), *R1C)) {
+ if (ImpliedCR->contains(*DomCR))
+ return false;
+ }
+ }
+ }
+
+ return std::nullopt;
+}
+
/// Return true if LHS implies RHS is true. Return false if LHS implies RHS is
/// false. Otherwise, return std::nullopt if we can't infer anything. We
/// expect the RHS to be an icmp and the LHS to be an 'and', 'or', or a 'select'
@@ -9529,15 +9593,24 @@ llvm::isImpliedCondition(const Value *LHS, CmpPredicate RHSPred,
LHSIsTrue = !LHSIsTrue;
// Both LHS and RHS are icmps.
- if (const auto *LHSCmp = dyn_cast<ICmpInst>(LHS))
- return isImpliedCondICmps(LHSCmp->getCmpPredicate(), LHSCmp->getOperand(0),
- LHSCmp->getOperand(1), RHSPred, RHSOp0, RHSOp1,
- DL, LHSIsTrue);
- const Value *V;
- if (match(LHS, m_NUWTrunc(m_Value(V))))
- return isImpliedCondICmps(CmpInst::ICMP_NE, V,
- ConstantInt::get(V->getType(), 0), RHSPred,
- RHSOp0, RHSOp1, DL, LHSIsTrue);
+ if (RHSOp0->getType()->getScalarType()->isIntOrPtrTy()) {
+ if (const auto *LHSCmp = dyn_cast<ICmpInst>(LHS))
+ return isImpliedCondICmps(LHSCmp->getCmpPredicate(),
+ LHSCmp->getOperand(0), LHSCmp->getOperand(1),
+ RHSPred, RHSOp0, RHSOp1, DL, LHSIsTrue);
+ const Value *V;
+ if (match(LHS, m_NUWTrunc(m_Value(V))))
+ return isImpliedCondICmps(CmpInst::ICMP_NE, V,
+ ConstantInt::get(V->getType(), 0), RHSPred,
+ RHSOp0, RHSOp1, DL, LHSIsTrue);
+ } else {
+ assert(RHSOp0->getType()->isFPOrFPVectorTy() &&
+ "Expected floating point type only!");
+ if (const auto *LHSCmp = dyn_cast<FCmpInst>(LHS))
+ return isImpliedCondFCmps(LHSCmp->getPredicate(), LHSCmp->getOperand(0),
+ LHSCmp->getOperand(1), RHSPred, RHSOp0, RHSOp1,
+ DL, LHSIsTrue);
+ }
/// The LHS should be an 'or', 'and', or a 'select' instruction. We expect
/// the RHS to be an icmp.
@@ -9574,6 +9647,13 @@ std::optional<bool> llvm::isImpliedCondition(const Value *LHS, const Value *RHS,
return InvertRHS ? !*Implied : *Implied;
return std::nullopt;
}
+ if (const FCmpInst *RHSCmp = dyn_cast<FCmpInst>(RHS)) {
+ if (auto Implied = isImpliedCondition(
+ LHS, RHSCmp->getPredicate(), RHSCmp->getOperand(0),
+ RHSCmp->getOperand(1), DL, LHSIsTrue, Depth))
+ return InvertRHS ? !*Implied : *Implied;
+ return std::nullopt;
+ }
const Value *V;
if (match(RHS, m_NUWTrunc(m_Value(V)))) {
diff --git a/llvm/lib/CodeGen/AsmPrinter/AsmPrinter.cpp b/llvm/lib/CodeGen/AsmPrinter/AsmPrinter.cpp
index 11efe49..10df9c1 100644
--- a/llvm/lib/CodeGen/AsmPrinter/AsmPrinter.cpp
+++ b/llvm/lib/CodeGen/AsmPrinter/AsmPrinter.cpp
@@ -2866,9 +2866,11 @@ bool AsmPrinter::doFinalization(Module &M) {
// If we don't have any trampolines, then we don't require stack memory
// to be executable. Some targets have a directive to declare this.
Function *InitTrampolineIntrinsic = M.getFunction("llvm.init.trampoline");
- if (!InitTrampolineIntrinsic || InitTrampolineIntrinsic->use_empty())
- if (MCSection *S = MAI->getNonexecutableStackSection(OutContext))
- OutStreamer->switchSection(S);
+ bool HasTrampolineUses =
+ InitTrampolineIntrinsic && !InitTrampolineIntrinsic->use_empty();
+ MCSection *S = MAI->getStackSection(OutContext, /*Exec=*/HasTrampolineUses);
+ if (S)
+ OutStreamer->switchSection(S);
if (TM.Options.EmitAddrsig) {
// Emit address-significance attributes for all globals.
diff --git a/llvm/lib/CodeGen/TargetInstrInfo.cpp b/llvm/lib/CodeGen/TargetInstrInfo.cpp
index 2f3b7a2..3c41bbe 100644
--- a/llvm/lib/CodeGen/TargetInstrInfo.cpp
+++ b/llvm/lib/CodeGen/TargetInstrInfo.cpp
@@ -1657,12 +1657,6 @@ bool TargetInstrInfo::isReMaterializableImpl(
// same virtual register, though.
if (MO.isDef() && Reg != DefReg)
return false;
-
- // Don't allow any virtual-register uses. Rematting an instruction with
- // virtual register uses would length the live ranges of the uses, which
- // is not necessarily a good idea, certainly not "trivial".
- if (MO.isUse())
- return false;
}
// Everything checked out.
diff --git a/llvm/lib/IR/Core.cpp b/llvm/lib/IR/Core.cpp
index 8b5965b..df0c85b 100644
--- a/llvm/lib/IR/Core.cpp
+++ b/llvm/lib/IR/Core.cpp
@@ -2994,6 +2994,8 @@ LLVMValueRef LLVMIsATerminatorInst(LLVMValueRef Inst) {
LLVMDbgRecordRef LLVMGetFirstDbgRecord(LLVMValueRef Inst) {
Instruction *Instr = unwrap<Instruction>(Inst);
+ if (!Instr->DebugMarker)
+ return nullptr;
auto I = Instr->DebugMarker->StoredDbgRecords.begin();
if (I == Instr->DebugMarker->StoredDbgRecords.end())
return nullptr;
@@ -3002,6 +3004,8 @@ LLVMDbgRecordRef LLVMGetFirstDbgRecord(LLVMValueRef Inst) {
LLVMDbgRecordRef LLVMGetLastDbgRecord(LLVMValueRef Inst) {
Instruction *Instr = unwrap<Instruction>(Inst);
+ if (!Instr->DebugMarker)
+ return nullptr;
auto I = Instr->DebugMarker->StoredDbgRecords.rbegin();
if (I == Instr->DebugMarker->StoredDbgRecords.rend())
return nullptr;
diff --git a/llvm/lib/MC/MCAsmInfoELF.cpp b/llvm/lib/MC/MCAsmInfoELF.cpp
index cdae9d7..98090d3 100644
--- a/llvm/lib/MC/MCAsmInfoELF.cpp
+++ b/llvm/lib/MC/MCAsmInfoELF.cpp
@@ -27,12 +27,13 @@ using namespace llvm;
void MCAsmInfoELF::anchor() {}
-MCSection *MCAsmInfoELF::getNonexecutableStackSection(MCContext &Ctx) const {
+MCSection *MCAsmInfoELF::getStackSection(MCContext &Ctx, bool Exec) const {
// Solaris doesn't know/doesn't care about .note.GNU-stack sections, so
// don't emit them.
if (Ctx.getTargetTriple().isOSSolaris())
return nullptr;
- return Ctx.getELFSection(".note.GNU-stack", ELF::SHT_PROGBITS, 0);
+ return Ctx.getELFSection(".note.GNU-stack", ELF::SHT_PROGBITS,
+ Exec ? ELF::SHF_EXECINSTR : 0U);
}
bool MCAsmInfoELF::useCodeAlign(const MCSection &Sec) const {
diff --git a/llvm/lib/MC/MCELFStreamer.cpp b/llvm/lib/MC/MCELFStreamer.cpp
index 2881d7c..1bc1b92 100644
--- a/llvm/lib/MC/MCELFStreamer.cpp
+++ b/llvm/lib/MC/MCELFStreamer.cpp
@@ -54,7 +54,7 @@ void MCELFStreamer::initSections(bool NoExecStack, const MCSubtargetInfo &STI) {
&STI);
if (NoExecStack)
- switchSection(Ctx.getAsmInfo()->getNonexecutableStackSection(Ctx));
+ switchSection(Ctx.getAsmInfo()->getStackSection(Ctx, /*Exec=*/false));
}
void MCELFStreamer::emitLabel(MCSymbol *S, SMLoc Loc) {
diff --git a/llvm/lib/Target/ARM/ARMInstrCDE.td b/llvm/lib/Target/ARM/ARMInstrCDE.td
index 54e27a6..f4326de 100644
--- a/llvm/lib/Target/ARM/ARMInstrCDE.td
+++ b/llvm/lib/Target/ARM/ARMInstrCDE.td
@@ -268,6 +268,7 @@ class CDE_Vec_Instr<bit acc, dag oops, dag iops, string asm, string cstr,
!con(iops, (ins vpred:$vp)), asm,
!strconcat(cstr, vpred.vpred_constraint)>,
CDE_RequiresQReg {
+ bits<0> vp;
}
diff --git a/llvm/lib/Target/ARM/ARMInstrMVE.td b/llvm/lib/Target/ARM/ARMInstrMVE.td
index 9dffd94..e244134 100644
--- a/llvm/lib/Target/ARM/ARMInstrMVE.td
+++ b/llvm/lib/Target/ARM/ARMInstrMVE.td
@@ -409,6 +409,7 @@ class MVE_p<dag oops, dag iops, InstrItinClass itin, string iname,
!strconcat(iname, "${vp}",
!if(!eq(suffix, ""), "", !strconcat(".", suffix))),
ops, !strconcat(cstr, vpred.vpred_constraint), vecsize, pattern> {
+ bits<0> vp;
let Inst{31-29} = 0b111;
let Inst{27-26} = 0b11;
}
diff --git a/llvm/lib/Target/ARM/Disassembler/ARMDisassembler.cpp b/llvm/lib/Target/ARM/Disassembler/ARMDisassembler.cpp
index d358913..e67db8e 100644
--- a/llvm/lib/Target/ARM/Disassembler/ARMDisassembler.cpp
+++ b/llvm/lib/Target/ARM/Disassembler/ARMDisassembler.cpp
@@ -639,6 +639,43 @@ static DecodeStatus DecodeCCOutOperand(MCInst &Inst,
return MCDisassembler::Success;
}
+static DecodeStatus DecodeVpredNOperand(MCInst &Inst,
+ const MCDisassembler *Decoder) {
+ const auto *D = static_cast<const ARMDisassembler *>(Decoder);
+ unsigned VCC = D->VPTBlock.getVPTPred();
+ MCRegister CondReg = VCC == ARMVCC::None ? ARM::NoRegister : ARM::P0;
+
+ Inst.addOperand(MCOperand::createImm(VCC)); // $cond
+ Inst.addOperand(MCOperand::createReg(CondReg)); // $cond_reg
+ Inst.addOperand(MCOperand::createReg(ARM::NoRegister)); // $tp_reg
+
+ return MCDisassembler::Success;
+}
+
+static DecodeStatus DecodeVpredROperand(MCInst &Inst,
+ const MCDisassembler *Decoder) {
+ const auto *D = static_cast<const ARMDisassembler *>(Decoder);
+ unsigned VCC = D->VPTBlock.getVPTPred();
+ MCRegister CondReg = VCC == ARMVCC::None ? ARM::NoRegister : ARM::P0;
+
+ Inst.addOperand(MCOperand::createImm(VCC)); // $cond
+ Inst.addOperand(MCOperand::createReg(CondReg)); // $cond_reg
+ Inst.addOperand(MCOperand::createReg(ARM::NoRegister)); // $tp_reg
+
+ // The last sub-operand ($inactive) is tied to an output operand.
+ // The output operand has already been decoded, so just copy it.
+ const MCInstrDesc &MCID = D->MCII->get(Inst.getOpcode());
+ unsigned InactiveOpIdx = Inst.getNumOperands();
+ int TiedOpIdx = MCID.getOperandConstraint(InactiveOpIdx, MCOI::TIED_TO);
+ assert(TiedOpIdx >= 0 &&
+ "Inactive register in vpred_r is not tied to an output!");
+
+ // Make a copy of the operand to ensure it is not invalidated when MI grows.
+ Inst.addOperand(MCOperand(Inst.getOperand(TiedOpIdx))); // $inactive
+
+ return MCDisassembler::Success;
+}
+
static DecodeStatus DecodeSORegImmOperand(MCInst &Inst, unsigned Val,
uint64_t Address,
const MCDisassembler *Decoder) {
@@ -2777,6 +2814,7 @@ static DecodeStatus DecodeMVEModImmInstruction(MCInst &Inst, unsigned Insn,
Inst.addOperand(MCOperand::createImm(imm));
+ Check(S, DecodeVpredROperand(Inst, Decoder));
return S;
}
@@ -2802,6 +2840,7 @@ static DecodeStatus DecodeMVEVADCInstruction(MCInst &Inst, unsigned Insn,
if (!fieldFromInstruction(Insn, 12, 1)) // I bit clear => need input FPSCR
Inst.addOperand(MCOperand::createReg(ARM::FPSCR_NZCV));
+ Check(S, DecodeVpredROperand(Inst, Decoder));
return S;
}
@@ -5466,30 +5505,6 @@ static DecodeStatus DecodeVPTMaskOperand(MCInst &Inst, unsigned Val,
return S;
}
-static DecodeStatus DecodeVpredROperand(MCInst &Inst, unsigned RegNo,
- uint64_t Address,
- const MCDisassembler *Decoder) {
- // The vpred_r operand type includes an MQPR register field derived
- // from the encoding. But we don't actually want to add an operand
- // to the MCInst at this stage, because AddThumbPredicate will do it
- // later, and will infer the register number from the TIED_TO
- // constraint. So this is a deliberately empty decoder method that
- // will inhibit the auto-generated disassembly code from adding an
- // operand at all.
- return MCDisassembler::Success;
-}
-
-[[maybe_unused]] static DecodeStatus
-DecodeVpredNOperand(MCInst &Inst, unsigned RegNo, uint64_t Address,
- const MCDisassembler *Decoder) {
- // Similar to above, we want to ensure that no operands are added for the
- // vpred operands. (This is marked "maybe_unused" for the moment; because
- // DecoderEmitter currently (wrongly) omits operands with no instruction bits,
- // the decoder doesn't actually call it yet. That will be addressed in a
- // future change.)
- return MCDisassembler::Success;
-}
-
static DecodeStatus
DecodeRestrictedIPredicateOperand(MCInst &Inst, unsigned Val, uint64_t Address,
const MCDisassembler *Decoder) {
@@ -5668,6 +5683,7 @@ DecodeMVE_MEM_pre(MCInst &Inst, unsigned Val, uint64_t Address,
if (!Check(S, AddrDecoder(Inst, addr, Address, Decoder)))
return MCDisassembler::Fail;
+ Check(S, DecodeVpredNOperand(Inst, Decoder));
return S;
}
@@ -5871,7 +5887,7 @@ static DecodeStatus DecodeMVEVCVTt1fp(MCInst &Inst, unsigned Insn,
return MCDisassembler::Fail;
if (!Check(S, DecodeVCVTImmOperand(Inst, imm6, Address, Decoder)))
return MCDisassembler::Fail;
-
+ Check(S, DecodeVpredROperand(Inst, Decoder));
return S;
}
@@ -5906,6 +5922,7 @@ static DecodeStatus DecodeMVEVCMP(MCInst &Inst, unsigned Insn, uint64_t Address,
if (!Check(S, predicate_decoder(Inst, fc, Address, Decoder)))
return MCDisassembler::Fail;
+ Check(S, DecodeVpredNOperand(Inst, Decoder));
return S;
}
@@ -5916,6 +5933,7 @@ static DecodeStatus DecodeMveVCTP(MCInst &Inst, unsigned Insn, uint64_t Address,
unsigned Rn = fieldFromInstruction(Insn, 16, 4);
if (!Check(S, DecoderGPRRegisterClass(Inst, Rn, Address, Decoder)))
return MCDisassembler::Fail;
+ Check(S, DecodeVpredNOperand(Inst, Decoder));
return S;
}
@@ -5925,6 +5943,7 @@ static DecodeStatus DecodeMVEVPNOT(MCInst &Inst, unsigned Insn,
DecodeStatus S = MCDisassembler::Success;
Inst.addOperand(MCOperand::createReg(ARM::VPR));
Inst.addOperand(MCOperand::createReg(ARM::VPR));
+ Check(S, DecodeVpredNOperand(Inst, Decoder));
return S;
}
@@ -6199,15 +6218,13 @@ ARMDisassembler::AddThumbPredicate(MCInst &MI) const {
(isVectorPredicable(MI) && ITBlock.instrInITBlock()))
S = SoftFail;
- // If we're in an IT/VPT block, base the predicate on that. Otherwise,
+ // If we're in an IT block, base the predicate on that. Otherwise,
// assume a predicate of AL.
unsigned CC = ARMCC::AL;
- unsigned VCC = ARMVCC::None;
if (ITBlock.instrInITBlock()) {
CC = ITBlock.getITCC();
ITBlock.advanceITState();
} else if (VPTBlock.instrInVPTBlock()) {
- VCC = VPTBlock.getVPTPred();
VPTBlock.advanceVPTState();
}
@@ -6230,34 +6247,6 @@ ARMDisassembler::AddThumbPredicate(MCInst &MI) const {
Check(S, SoftFail);
}
- MCInst::iterator VCCI = MI.begin();
- unsigned VCCPos;
- for (VCCPos = 0; VCCPos < MCID.NumOperands; ++VCCPos, ++VCCI) {
- if (ARM::isVpred(MCID.operands()[VCCPos].OperandType) || VCCI == MI.end())
- break;
- }
-
- if (isVectorPredicable(MI)) {
- VCCI = MI.insert(VCCI, MCOperand::createImm(VCC));
- ++VCCI;
- if (VCC == ARMVCC::None)
- VCCI = MI.insert(VCCI, MCOperand::createReg(0));
- else
- VCCI = MI.insert(VCCI, MCOperand::createReg(ARM::P0));
- ++VCCI;
- VCCI = MI.insert(VCCI, MCOperand::createReg(0));
- ++VCCI;
- if (MCID.operands()[VCCPos].OperandType == ARM::OPERAND_VPRED_R) {
- int TiedOp = MCID.getOperandConstraint(VCCPos + 3, MCOI::TIED_TO);
- assert(TiedOp >= 0 &&
- "Inactive register in vpred_r is not tied to an output!");
- // Copy the operand to ensure it's not invalidated when MI grows.
- MI.insert(VCCI, MCOperand(MI.getOperand(TiedOp)));
- }
- } else if (VCC != ARMVCC::None) {
- Check(S, SoftFail);
- }
-
return S;
}
diff --git a/llvm/lib/Target/BPF/MCTargetDesc/BPFMCAsmInfo.h b/llvm/lib/Target/BPF/MCTargetDesc/BPFMCAsmInfo.h
index dfd896f..8d8066a 100644
--- a/llvm/lib/Target/BPF/MCTargetDesc/BPFMCAsmInfo.h
+++ b/llvm/lib/Target/BPF/MCTargetDesc/BPFMCAsmInfo.h
@@ -49,7 +49,7 @@ public:
DwarfUsesRelocationsAcrossSections = enable;
}
- MCSection *getNonexecutableStackSection(MCContext &Ctx) const override {
+ MCSection *getStackSection(MCContext &Ctx, bool Exec) const override {
return nullptr;
}
};
diff --git a/llvm/lib/Transforms/InstCombine/InstCombineSelect.cpp b/llvm/lib/Transforms/InstCombine/InstCombineSelect.cpp
index 3df448d..8f60e50 100644
--- a/llvm/lib/Transforms/InstCombine/InstCombineSelect.cpp
+++ b/llvm/lib/Transforms/InstCombine/InstCombineSelect.cpp
@@ -17,6 +17,7 @@
#include "llvm/Analysis/AssumptionCache.h"
#include "llvm/Analysis/CmpInstAnalysis.h"
#include "llvm/Analysis/InstructionSimplify.h"
+#include "llvm/Analysis/Loads.h"
#include "llvm/Analysis/OverflowInstAnalysis.h"
#include "llvm/Analysis/ValueTracking.h"
#include "llvm/Analysis/VectorUtils.h"
@@ -42,6 +43,7 @@
#include "llvm/Support/KnownBits.h"
#include "llvm/Transforms/InstCombine/InstCombiner.h"
#include <cassert>
+#include <optional>
#include <utility>
#define DEBUG_TYPE "instcombine"
@@ -1451,10 +1453,16 @@ Instruction *InstCombinerImpl::foldSelectValueEquivalence(SelectInst &Sel,
return nullptr;
};
- if (Instruction *R = ReplaceOldOpWithNewOp(CmpLHS, CmpRHS))
- return R;
- if (Instruction *R = ReplaceOldOpWithNewOp(CmpRHS, CmpLHS))
- return R;
+ bool CanReplaceCmpLHSWithRHS = canReplacePointersIfEqual(CmpLHS, CmpRHS, DL);
+ if (CanReplaceCmpLHSWithRHS) {
+ if (Instruction *R = ReplaceOldOpWithNewOp(CmpLHS, CmpRHS))
+ return R;
+ }
+ bool CanReplaceCmpRHSWithLHS = canReplacePointersIfEqual(CmpRHS, CmpLHS, DL);
+ if (CanReplaceCmpRHSWithLHS) {
+ if (Instruction *R = ReplaceOldOpWithNewOp(CmpRHS, CmpLHS))
+ return R;
+ }
auto *FalseInst = dyn_cast<Instruction>(FalseVal);
if (!FalseInst)
@@ -1469,12 +1477,14 @@ Instruction *InstCombinerImpl::foldSelectValueEquivalence(SelectInst &Sel,
// Example:
// (X == 42) ? 43 : (X + 1) --> (X == 42) ? (X + 1) : (X + 1) --> X + 1
SmallVector<Instruction *> DropFlags;
- if (simplifyWithOpReplaced(FalseVal, CmpLHS, CmpRHS, SQ,
- /* AllowRefinement */ false,
- &DropFlags) == TrueVal ||
- simplifyWithOpReplaced(FalseVal, CmpRHS, CmpLHS, SQ,
- /* AllowRefinement */ false,
- &DropFlags) == TrueVal) {
+ if ((CanReplaceCmpLHSWithRHS &&
+ simplifyWithOpReplaced(FalseVal, CmpLHS, CmpRHS, SQ,
+ /* AllowRefinement */ false,
+ &DropFlags) == TrueVal) ||
+ (CanReplaceCmpRHSWithLHS &&
+ simplifyWithOpReplaced(FalseVal, CmpRHS, CmpLHS, SQ,
+ /* AllowRefinement */ false,
+ &DropFlags) == TrueVal)) {
for (Instruction *I : DropFlags) {
I->dropPoisonGeneratingAnnotations();
Worklist.add(I);
diff --git a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
index cb6bfb2..56a3d6d 100644
--- a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
+++ b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
@@ -3903,8 +3903,7 @@ void LoopVectorizationPlanner::emitInvalidCostRemarks(
if (VF.isScalar())
continue;
- VPCostContext CostCtx(CM.TTI, *CM.TLI, *Plan, CM, CM.CostKind,
- *CM.PSE.getSE());
+ VPCostContext CostCtx(CM.TTI, *CM.TLI, *Plan, CM, CM.CostKind);
precomputeCosts(*Plan, VF, CostCtx);
auto Iter = vp_depth_first_deep(Plan->getVectorLoopRegion()->getEntry());
for (VPBasicBlock *VPBB : VPBlockUtils::blocksOnly<VPBasicBlock>(Iter)) {
@@ -4161,8 +4160,7 @@ VectorizationFactor LoopVectorizationPlanner::selectVectorizationFactor() {
// Add on other costs that are modelled in VPlan, but not in the legacy
// cost model.
- VPCostContext CostCtx(CM.TTI, *CM.TLI, *P, CM, CM.CostKind,
- *CM.PSE.getSE());
+ VPCostContext CostCtx(CM.TTI, *CM.TLI, *P, CM, CM.CostKind);
VPRegionBlock *VectorRegion = P->getVectorLoopRegion();
assert(VectorRegion && "Expected to have a vector region!");
for (VPBasicBlock *VPBB : VPBlockUtils::blocksOnly<VPBasicBlock>(
@@ -6854,7 +6852,7 @@ LoopVectorizationPlanner::precomputeCosts(VPlan &Plan, ElementCount VF,
InstructionCost LoopVectorizationPlanner::cost(VPlan &Plan,
ElementCount VF) const {
- VPCostContext CostCtx(CM.TTI, *CM.TLI, Plan, CM, CM.CostKind, *PSE.getSE());
+ VPCostContext CostCtx(CM.TTI, *CM.TLI, Plan, CM, CM.CostKind);
InstructionCost Cost = precomputeCosts(Plan, VF, CostCtx);
// Now compute and add the VPlan-based cost.
@@ -7087,8 +7085,7 @@ VectorizationFactor LoopVectorizationPlanner::computeBestVF() {
// simplifications not accounted for in the legacy cost model. If that's the
// case, don't trigger the assertion, as the extra simplifications may cause a
// different VF to be picked by the VPlan-based cost model.
- VPCostContext CostCtx(CM.TTI, *CM.TLI, BestPlan, CM, CM.CostKind,
- *CM.PSE.getSE());
+ VPCostContext CostCtx(CM.TTI, *CM.TLI, BestPlan, CM, CM.CostKind);
precomputeCosts(BestPlan, BestFactor.Width, CostCtx);
// Verify that the VPlan-based and legacy cost models agree, except for VPlans
// with early exits and plans with additional VPlan simplifications. The
@@ -8624,8 +8621,7 @@ VPlanPtr LoopVectorizationPlanner::tryToBuildVPlanWithVPRecipes(
// TODO: Enable following transform when the EVL-version of extended-reduction
// and mulacc-reduction are implemented.
if (!CM.foldTailWithEVL()) {
- VPCostContext CostCtx(CM.TTI, *CM.TLI, *Plan, CM, CM.CostKind,
- *CM.PSE.getSE());
+ VPCostContext CostCtx(CM.TTI, *CM.TLI, *Plan, CM, CM.CostKind);
VPlanTransforms::runPass(VPlanTransforms::convertToAbstractRecipes, *Plan,
CostCtx, Range);
}
@@ -10079,7 +10075,7 @@ bool LoopVectorizePass::processLoop(Loop *L) {
bool ForceVectorization =
Hints.getForce() == LoopVectorizeHints::FK_Enabled;
VPCostContext CostCtx(CM.TTI, *CM.TLI, LVP.getPlanFor(VF.Width), CM,
- CM.CostKind, *CM.PSE.getSE());
+ CM.CostKind);
if (!ForceVectorization &&
!isOutsideLoopWorkProfitable(Checks, VF, L, PSE, CostCtx,
LVP.getPlanFor(VF.Width), SEL,
diff --git a/llvm/lib/Transforms/Vectorize/VPlan.cpp b/llvm/lib/Transforms/Vectorize/VPlan.cpp
index 2555ebe..07b191a 100644
--- a/llvm/lib/Transforms/Vectorize/VPlan.cpp
+++ b/llvm/lib/Transforms/Vectorize/VPlan.cpp
@@ -1772,8 +1772,7 @@ VPCostContext::getOperandInfo(VPValue *V) const {
}
InstructionCost VPCostContext::getScalarizationOverhead(
- Type *ResultTy, ArrayRef<const VPValue *> Operands, ElementCount VF,
- bool AlwaysIncludeReplicatingR) {
+ Type *ResultTy, ArrayRef<const VPValue *> Operands, ElementCount VF) {
if (VF.isScalar())
return 0;
@@ -1793,11 +1792,7 @@ InstructionCost VPCostContext::getScalarizationOverhead(
SmallPtrSet<const VPValue *, 4> UniqueOperands;
SmallVector<Type *> Tys;
for (auto *Op : Operands) {
- if (Op->isLiveIn() ||
- (!AlwaysIncludeReplicatingR &&
- isa<VPReplicateRecipe, VPPredInstPHIRecipe>(Op)) ||
- (isa<VPReplicateRecipe>(Op) &&
- cast<VPReplicateRecipe>(Op)->getOpcode() == Instruction::Load) ||
+ if (Op->isLiveIn() || isa<VPReplicateRecipe, VPPredInstPHIRecipe>(Op) ||
!UniqueOperands.insert(Op).second)
continue;
Tys.push_back(toVectorizedTy(Types.inferScalarType(Op), VF));
diff --git a/llvm/lib/Transforms/Vectorize/VPlanHelpers.h b/llvm/lib/Transforms/Vectorize/VPlanHelpers.h
index 1580a3b..fc1a09e 100644
--- a/llvm/lib/Transforms/Vectorize/VPlanHelpers.h
+++ b/llvm/lib/Transforms/Vectorize/VPlanHelpers.h
@@ -349,14 +349,12 @@ struct VPCostContext {
LoopVectorizationCostModel &CM;
SmallPtrSet<Instruction *, 8> SkipCostComputation;
TargetTransformInfo::TargetCostKind CostKind;
- ScalarEvolution &SE;
VPCostContext(const TargetTransformInfo &TTI, const TargetLibraryInfo &TLI,
const VPlan &Plan, LoopVectorizationCostModel &CM,
- TargetTransformInfo::TargetCostKind CostKind,
- ScalarEvolution &SE)
+ TargetTransformInfo::TargetCostKind CostKind)
: TTI(TTI), TLI(TLI), Types(Plan), LLVMCtx(Plan.getContext()), CM(CM),
- CostKind(CostKind), SE(SE) {}
+ CostKind(CostKind) {}
/// Return the cost for \p UI with \p VF using the legacy cost model as
/// fallback until computing the cost of all recipes migrates to VPlan.
@@ -376,12 +374,10 @@ struct VPCostContext {
/// Estimate the overhead of scalarizing a recipe with result type \p ResultTy
/// and \p Operands with \p VF. This is a convenience wrapper for the
- /// type-based getScalarizationOverhead API. If \p AlwaysIncludeReplicatingR
- /// is true, always compute the cost of scalarizing replicating operands.
- InstructionCost
- getScalarizationOverhead(Type *ResultTy, ArrayRef<const VPValue *> Operands,
- ElementCount VF,
- bool AlwaysIncludeReplicatingR = false);
+ /// type-based getScalarizationOverhead API.
+ InstructionCost getScalarizationOverhead(Type *ResultTy,
+ ArrayRef<const VPValue *> Operands,
+ ElementCount VF);
};
/// This class can be used to assign names to VPValues. For VPValues without
diff --git a/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp b/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp
index a88cffc..67b9244 100644
--- a/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp
+++ b/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp
@@ -40,7 +40,6 @@
#include <cassert>
using namespace llvm;
-using namespace llvm::VPlanPatternMatch;
using VectorParts = SmallVector<Value *, 2>;
@@ -304,6 +303,7 @@ VPPartialReductionRecipe::computeCost(ElementCount VF,
VPRecipeBase *OpR = Op->getDefiningRecipe();
// If the partial reduction is predicated, a select will be operand 0
+ using namespace llvm::VPlanPatternMatch;
if (match(getOperand(1), m_Select(m_VPValue(), m_VPValue(Op), m_VPValue()))) {
OpR = Op->getDefiningRecipe();
}
@@ -1963,6 +1963,7 @@ InstructionCost VPWidenSelectRecipe::computeCost(ElementCount VF,
Type *VectorTy = toVectorTy(Ctx.Types.inferScalarType(this), VF);
VPValue *Op0, *Op1;
+ using namespace llvm::VPlanPatternMatch;
if (!ScalarCond && ScalarTy->getScalarSizeInBits() == 1 &&
(match(this, m_LogicalAnd(m_VPValue(Op0), m_VPValue(Op1))) ||
match(this, m_LogicalOr(m_VPValue(Op0), m_VPValue(Op1))))) {
@@ -3110,62 +3111,6 @@ bool VPReplicateRecipe::shouldPack() const {
});
}
-/// Returns true if \p Ptr is a pointer computation for which the legacy cost
-/// model computes a SCEV expression when computing the address cost.
-static bool shouldUseAddressAccessSCEV(const VPValue *Ptr) {
- auto *PtrR = Ptr->getDefiningRecipe();
- if (!PtrR || !((isa<VPReplicateRecipe>(PtrR) &&
- cast<VPReplicateRecipe>(PtrR)->getOpcode() ==
- Instruction::GetElementPtr) ||
- isa<VPWidenGEPRecipe>(PtrR) ||
- match(Ptr, m_GetElementPtr(m_VPValue(), m_VPValue()))))
- return false;
-
- // We are looking for a GEP where all indices are either loop invariant or
- // inductions.
- for (VPValue *Opd : drop_begin(PtrR->operands())) {
- if (!Opd->isDefinedOutsideLoopRegions() &&
- !isa<VPScalarIVStepsRecipe, VPWidenIntOrFpInductionRecipe>(Opd))
- return false;
- }
-
- return true;
-}
-
-/// Returns true if \p V is used as part of the address of another load or
-/// store.
-static bool isUsedByLoadStoreAddress(const VPUser *V) {
- SmallPtrSet<const VPUser *, 4> Seen;
- SmallVector<const VPUser *> WorkList = {V};
-
- while (!WorkList.empty()) {
- auto *Cur = dyn_cast<VPSingleDefRecipe>(WorkList.pop_back_val());
- if (!Cur || !Seen.insert(Cur).second)
- continue;
-
- for (VPUser *U : Cur->users()) {
- if (auto *InterleaveR = dyn_cast<VPInterleaveBase>(U))
- if (InterleaveR->getAddr() == Cur)
- return true;
- if (auto *RepR = dyn_cast<VPReplicateRecipe>(U)) {
- if (RepR->getOpcode() == Instruction::Load &&
- RepR->getOperand(0) == Cur)
- return true;
- if (RepR->getOpcode() == Instruction::Store &&
- RepR->getOperand(1) == Cur)
- return true;
- }
- if (auto *MemR = dyn_cast<VPWidenMemoryRecipe>(U)) {
- if (MemR->getAddr() == Cur && MemR->isConsecutive())
- return true;
- }
- }
-
- append_range(WorkList, cast<VPSingleDefRecipe>(Cur)->users());
- }
- return false;
-}
-
InstructionCost VPReplicateRecipe::computeCost(ElementCount VF,
VPCostContext &Ctx) const {
Instruction *UI = cast<Instruction>(getUnderlyingValue());
@@ -3273,59 +3218,21 @@ InstructionCost VPReplicateRecipe::computeCost(ElementCount VF,
}
case Instruction::Load:
case Instruction::Store: {
- if (VF.isScalable() && !isSingleScalar())
- return InstructionCost::getInvalid();
-
+ if (isSingleScalar()) {
+ bool IsLoad = UI->getOpcode() == Instruction::Load;
+ Type *ValTy = Ctx.Types.inferScalarType(IsLoad ? this : getOperand(0));
+ Type *ScalarPtrTy = Ctx.Types.inferScalarType(getOperand(IsLoad ? 0 : 1));
+ const Align Alignment = getLoadStoreAlignment(UI);
+ unsigned AS = getLoadStoreAddressSpace(UI);
+ TTI::OperandValueInfo OpInfo = TTI::getOperandInfo(UI->getOperand(0));
+ InstructionCost ScalarMemOpCost = Ctx.TTI.getMemoryOpCost(
+ UI->getOpcode(), ValTy, Alignment, AS, Ctx.CostKind, OpInfo, UI);
+ return ScalarMemOpCost + Ctx.TTI.getAddressComputationCost(
+ ScalarPtrTy, nullptr, nullptr, Ctx.CostKind);
+ }
// TODO: See getMemInstScalarizationCost for how to handle replicating and
// predicated cases.
- const VPRegionBlock *ParentRegion = getParent()->getParent();
- if (ParentRegion && ParentRegion->isReplicator())
- break;
-
- bool IsLoad = UI->getOpcode() == Instruction::Load;
- const VPValue *PtrOp = getOperand(!IsLoad);
- // TODO: Handle cases where we need to pass a SCEV to
- // getAddressComputationCost.
- if (shouldUseAddressAccessSCEV(PtrOp))
- break;
-
- Type *ValTy = Ctx.Types.inferScalarType(IsLoad ? this : getOperand(0));
- Type *ScalarPtrTy = Ctx.Types.inferScalarType(PtrOp);
- const Align Alignment = getLoadStoreAlignment(UI);
- unsigned AS = getLoadStoreAddressSpace(UI);
- TTI::OperandValueInfo OpInfo = TTI::getOperandInfo(UI->getOperand(0));
- InstructionCost ScalarMemOpCost = Ctx.TTI.getMemoryOpCost(
- UI->getOpcode(), ValTy, Alignment, AS, Ctx.CostKind, OpInfo);
-
- Type *PtrTy = isSingleScalar() ? ScalarPtrTy : toVectorTy(ScalarPtrTy, VF);
- bool UsedByLoadStoreAddress = isUsedByLoadStoreAddress(this);
- InstructionCost ScalarCost =
- ScalarMemOpCost + Ctx.TTI.getAddressComputationCost(
- PtrTy, UsedByLoadStoreAddress ? nullptr : &Ctx.SE,
- nullptr, Ctx.CostKind);
- if (isSingleScalar())
- return ScalarCost;
-
- SmallVector<const VPValue *> OpsToScalarize;
- Type *ResultTy = Type::getVoidTy(PtrTy->getContext());
- // Set ResultTy and OpsToScalarize, if scalarization is needed. Currently we
- // don't assign scalarization overhead in general, if the target prefers
- // vectorized addressing or the loaded value is used as part of an address
- // of another load or store.
- bool PreferVectorizedAddressing = Ctx.TTI.prefersVectorizedAddressing();
- if (PreferVectorizedAddressing || !UsedByLoadStoreAddress) {
- bool EfficientVectorLoadStore =
- Ctx.TTI.supportsEfficientVectorElementLoadStore();
- if (!(IsLoad && !PreferVectorizedAddressing) &&
- !(!IsLoad && EfficientVectorLoadStore))
- append_range(OpsToScalarize, operands());
-
- if (!EfficientVectorLoadStore)
- ResultTy = Ctx.Types.inferScalarType(this);
- }
-
- return (ScalarCost * VF.getFixedValue()) +
- Ctx.getScalarizationOverhead(ResultTy, OpsToScalarize, VF, true);
+ break;
}
}