diff options
Diffstat (limited to 'llvm/lib')
62 files changed, 828 insertions, 367 deletions
diff --git a/llvm/lib/Analysis/DependenceAnalysis.cpp b/llvm/lib/Analysis/DependenceAnalysis.cpp index 11d8294..e45d1f7 100644 --- a/llvm/lib/Analysis/DependenceAnalysis.cpp +++ b/llvm/lib/Analysis/DependenceAnalysis.cpp @@ -1587,6 +1587,15 @@ static const SCEV *minusSCEVNoSignedOverflow(const SCEV *A, const SCEV *B, return nullptr; } +/// Returns \p A * \p B if it guaranteed not to signed wrap. Otherwise returns +/// nullptr. \p A and \p B must have the same integer type. +static const SCEV *mulSCEVNoSignedOverflow(const SCEV *A, const SCEV *B, + ScalarEvolution &SE) { + if (SE.willNotOverflow(Instruction::Mul, /*Signed=*/true, A, B)) + return SE.getMulExpr(A, B); + return nullptr; +} + /// Returns the absolute value of \p A. In the context of dependence analysis, /// we need an absolute value in a mathematical sense. If \p A is the signed /// minimum value, we cannot represent it unless extending the original type. @@ -1686,7 +1695,11 @@ bool DependenceInfo::strongSIVtest(const SCEV *Coeff, const SCEV *SrcConst, assert(0 < Level && Level <= CommonLevels && "level out of range"); Level--; - const SCEV *Delta = SE->getMinusSCEV(SrcConst, DstConst); + const SCEV *Delta = minusSCEVNoSignedOverflow(SrcConst, DstConst, *SE); + if (!Delta) { + Result.Consistent = false; + return false; + } LLVM_DEBUG(dbgs() << "\t Delta = " << *Delta); LLVM_DEBUG(dbgs() << ", " << *Delta->getType() << "\n"); @@ -1702,7 +1715,9 @@ bool DependenceInfo::strongSIVtest(const SCEV *Coeff, const SCEV *SrcConst, const SCEV *AbsCoeff = absSCEVNoSignedOverflow(Coeff, *SE); if (!AbsDelta || !AbsCoeff) return false; - const SCEV *Product = SE->getMulExpr(UpperBound, AbsCoeff); + const SCEV *Product = mulSCEVNoSignedOverflow(UpperBound, AbsCoeff, *SE); + if (!Product) + return false; return isKnownPredicate(CmpInst::ICMP_SGT, AbsDelta, Product); }(); if (IsDeltaLarge) { diff --git a/llvm/lib/Analysis/RegionPrinter.cpp b/llvm/lib/Analysis/RegionPrinter.cpp index a83af4e..33e073b 100644 --- a/llvm/lib/Analysis/RegionPrinter.cpp +++ b/llvm/lib/Analysis/RegionPrinter.cpp @@ -29,10 +29,9 @@ onlySimpleRegions("only-simple-regions", cl::Hidden, cl::init(false)); -namespace llvm { - -std::string DOTGraphTraits<RegionNode *>::getNodeLabel(RegionNode *Node, - RegionNode *Graph) { +std::string +llvm::DOTGraphTraits<RegionNode *>::getNodeLabel(RegionNode *Node, + RegionNode *Graph) { if (!Node->isSubRegion()) { BasicBlock *BB = Node->getNodeAs<BasicBlock>(); @@ -46,7 +45,8 @@ std::string DOTGraphTraits<RegionNode *>::getNodeLabel(RegionNode *Node, } template <> -struct DOTGraphTraits<RegionInfo *> : public DOTGraphTraits<RegionNode *> { +struct llvm::DOTGraphTraits<RegionInfo *> + : public llvm::DOTGraphTraits<RegionNode *> { DOTGraphTraits (bool isSimple = false) : DOTGraphTraits<RegionNode*>(isSimple) {} @@ -125,7 +125,6 @@ struct DOTGraphTraits<RegionInfo *> : public DOTGraphTraits<RegionNode *> { printRegionCluster(*G->getTopLevelRegion(), GW, 4); } }; -} // end namespace llvm namespace { diff --git a/llvm/lib/CodeGen/AsmPrinter/AsmPrinter.cpp b/llvm/lib/CodeGen/AsmPrinter/AsmPrinter.cpp index 713277d..3aa245b 100644 --- a/llvm/lib/CodeGen/AsmPrinter/AsmPrinter.cpp +++ b/llvm/lib/CodeGen/AsmPrinter/AsmPrinter.cpp @@ -2087,6 +2087,17 @@ void AsmPrinter::emitFunctionBody() { // This is only used to influence register allocation behavior, no // actual initialization is needed. break; + case TargetOpcode::RELOC_NONE: { + // Generate a temporary label for the current PC. + MCSymbol *Sym = OutContext.createTempSymbol("reloc_none"); + OutStreamer->emitLabel(Sym); + const MCExpr *Dot = MCSymbolRefExpr::create(Sym, OutContext); + const MCExpr *Value = MCSymbolRefExpr::create( + OutContext.getOrCreateSymbol(MI.getOperand(0).getSymbolName()), + OutContext); + OutStreamer->emitRelocDirective(*Dot, "BFD_RELOC_NONE", Value, SMLoc()); + break; + } default: emitInstruction(&MI); diff --git a/llvm/lib/CodeGen/GlobalISel/CallLowering.cpp b/llvm/lib/CodeGen/GlobalISel/CallLowering.cpp index b3c3125..7be7468 100644 --- a/llvm/lib/CodeGen/GlobalISel/CallLowering.cpp +++ b/llvm/lib/CodeGen/GlobalISel/CallLowering.cpp @@ -292,7 +292,8 @@ void CallLowering::splitToValueTypes(const ArgInfo &OrigArg, LLVMContext &Ctx = OrigArg.Ty->getContext(); SmallVector<EVT, 4> SplitVTs; - ComputeValueVTs(*TLI, DL, OrigArg.Ty, SplitVTs, Offsets, 0); + ComputeValueVTs(*TLI, DL, OrigArg.Ty, SplitVTs, /*MemVTs=*/nullptr, Offsets, + 0); if (SplitVTs.size() == 0) return; @@ -996,7 +997,7 @@ void CallLowering::insertSRetLoads(MachineIRBuilder &MIRBuilder, Type *RetTy, SmallVector<EVT, 4> SplitVTs; SmallVector<uint64_t, 4> Offsets; - ComputeValueVTs(*TLI, DL, RetTy, SplitVTs, &Offsets, 0); + ComputeValueVTs(*TLI, DL, RetTy, SplitVTs, /*MemVTs=*/nullptr, &Offsets, 0); assert(VRegs.size() == SplitVTs.size()); @@ -1028,7 +1029,7 @@ void CallLowering::insertSRetStores(MachineIRBuilder &MIRBuilder, Type *RetTy, SmallVector<EVT, 4> SplitVTs; SmallVector<uint64_t, 4> Offsets; - ComputeValueVTs(*TLI, DL, RetTy, SplitVTs, &Offsets, 0); + ComputeValueVTs(*TLI, DL, RetTy, SplitVTs, /*MemVTs=*/nullptr, &Offsets, 0); assert(VRegs.size() == SplitVTs.size()); diff --git a/llvm/lib/CodeGen/GlobalISel/IRTranslator.cpp b/llvm/lib/CodeGen/GlobalISel/IRTranslator.cpp index 4fd2204..4f6a19f 100644 --- a/llvm/lib/CodeGen/GlobalISel/IRTranslator.cpp +++ b/llvm/lib/CodeGen/GlobalISel/IRTranslator.cpp @@ -2686,6 +2686,13 @@ bool IRTranslator::translateKnownIntrinsic(const CallInst &CI, Intrinsic::ID ID, case Intrinsic::experimental_convergence_entry: case Intrinsic::experimental_convergence_loop: return translateConvergenceControlIntrinsic(CI, ID, MIRBuilder); + case Intrinsic::reloc_none: { + Metadata *MD = cast<MetadataAsValue>(CI.getArgOperand(0))->getMetadata(); + StringRef SymbolName = cast<MDString>(MD)->getString(); + MIRBuilder.buildInstr(TargetOpcode::RELOC_NONE) + .addExternalSymbol(SymbolName.data()); + return true; + } } return false; } @@ -2821,20 +2828,34 @@ bool IRTranslator::translateCall(const User &U, MachineIRBuilder &MIRBuilder) { if (translateKnownIntrinsic(CI, ID, MIRBuilder)) return true; + TargetLowering::IntrinsicInfo Info; + bool IsTgtMemIntrinsic = TLI->getTgtMemIntrinsic(Info, CI, *MF, ID); + + return translateIntrinsic(CI, ID, MIRBuilder, + IsTgtMemIntrinsic ? &Info : nullptr); +} + +/// Translate a call to an intrinsic. +/// Depending on whether TLI->getTgtMemIntrinsic() is true, TgtMemIntrinsicInfo +/// is a pointer to the correspondingly populated IntrinsicInfo object. +/// Otherwise, this pointer is null. +bool IRTranslator::translateIntrinsic( + const CallBase &CB, Intrinsic::ID ID, MachineIRBuilder &MIRBuilder, + const TargetLowering::IntrinsicInfo *TgtMemIntrinsicInfo) { ArrayRef<Register> ResultRegs; - if (!CI.getType()->isVoidTy()) - ResultRegs = getOrCreateVRegs(CI); + if (!CB.getType()->isVoidTy()) + ResultRegs = getOrCreateVRegs(CB); // Ignore the callsite attributes. Backend code is most likely not expecting // an intrinsic to sometimes have side effects and sometimes not. MachineInstrBuilder MIB = MIRBuilder.buildIntrinsic(ID, ResultRegs); - if (isa<FPMathOperator>(CI)) - MIB->copyIRFlags(CI); + if (isa<FPMathOperator>(CB)) + MIB->copyIRFlags(CB); - for (const auto &Arg : enumerate(CI.args())) { + for (const auto &Arg : enumerate(CB.args())) { // If this is required to be an immediate, don't materialize it in a // register. - if (CI.paramHasAttr(Arg.index(), Attribute::ImmArg)) { + if (CB.paramHasAttr(Arg.index(), Attribute::ImmArg)) { if (ConstantInt *CI = dyn_cast<ConstantInt>(Arg.value())) { // imm arguments are more convenient than cimm (and realistically // probably sufficient), so use them. @@ -2863,29 +2884,33 @@ bool IRTranslator::translateCall(const User &U, MachineIRBuilder &MIRBuilder) { } // Add a MachineMemOperand if it is a target mem intrinsic. - TargetLowering::IntrinsicInfo Info; - // TODO: Add a GlobalISel version of getTgtMemIntrinsic. - if (TLI->getTgtMemIntrinsic(Info, CI, *MF, ID)) { - Align Alignment = Info.align.value_or( - DL->getABITypeAlign(Info.memVT.getTypeForEVT(F->getContext()))); - LLT MemTy = Info.memVT.isSimple() - ? getLLTForMVT(Info.memVT.getSimpleVT()) - : LLT::scalar(Info.memVT.getStoreSizeInBits()); + if (TgtMemIntrinsicInfo) { + const Function *F = CB.getCalledFunction(); + + Align Alignment = TgtMemIntrinsicInfo->align.value_or(DL->getABITypeAlign( + TgtMemIntrinsicInfo->memVT.getTypeForEVT(F->getContext()))); + LLT MemTy = + TgtMemIntrinsicInfo->memVT.isSimple() + ? getLLTForMVT(TgtMemIntrinsicInfo->memVT.getSimpleVT()) + : LLT::scalar(TgtMemIntrinsicInfo->memVT.getStoreSizeInBits()); // TODO: We currently just fallback to address space 0 if getTgtMemIntrinsic // didn't yield anything useful. MachinePointerInfo MPI; - if (Info.ptrVal) - MPI = MachinePointerInfo(Info.ptrVal, Info.offset); - else if (Info.fallbackAddressSpace) - MPI = MachinePointerInfo(*Info.fallbackAddressSpace); + if (TgtMemIntrinsicInfo->ptrVal) { + MPI = MachinePointerInfo(TgtMemIntrinsicInfo->ptrVal, + TgtMemIntrinsicInfo->offset); + } else if (TgtMemIntrinsicInfo->fallbackAddressSpace) { + MPI = MachinePointerInfo(*TgtMemIntrinsicInfo->fallbackAddressSpace); + } MIB.addMemOperand(MF->getMachineMemOperand( - MPI, Info.flags, MemTy, Alignment, CI.getAAMetadata(), - /*Ranges=*/nullptr, Info.ssid, Info.order, Info.failureOrder)); + MPI, TgtMemIntrinsicInfo->flags, MemTy, Alignment, CB.getAAMetadata(), + /*Ranges=*/nullptr, TgtMemIntrinsicInfo->ssid, + TgtMemIntrinsicInfo->order, TgtMemIntrinsicInfo->failureOrder)); } - if (CI.isConvergent()) { - if (auto Bundle = CI.getOperandBundle(LLVMContext::OB_convergencectrl)) { + if (CB.isConvergent()) { + if (auto Bundle = CB.getOperandBundle(LLVMContext::OB_convergencectrl)) { auto *Token = Bundle->Inputs[0].get(); Register TokenReg = getOrCreateVReg(*Token); MIB.addUse(TokenReg, RegState::Implicit); diff --git a/llvm/lib/CodeGen/MIRParser/MIParser.cpp b/llvm/lib/CodeGen/MIRParser/MIParser.cpp index 4795d81..434a579 100644 --- a/llvm/lib/CodeGen/MIRParser/MIParser.cpp +++ b/llvm/lib/CodeGen/MIRParser/MIParser.cpp @@ -1161,6 +1161,8 @@ bool MIParser::parse(MachineInstr *&MI) { MemOperands.push_back(MemOp); if (Token.isNewlineOrEOF()) break; + if (OpCode == TargetOpcode::BUNDLE && Token.is(MIToken::lbrace)) + break; if (Token.isNot(MIToken::comma)) return error("expected ',' before the next machine memory operand"); lex(); diff --git a/llvm/lib/CodeGen/MachineInstr.cpp b/llvm/lib/CodeGen/MachineInstr.cpp index 8ad9245..37e5c51 100644 --- a/llvm/lib/CodeGen/MachineInstr.cpp +++ b/llvm/lib/CodeGen/MachineInstr.cpp @@ -1547,10 +1547,14 @@ bool MachineInstr::mayAlias(BatchAAResults *AA, const MachineInstr &Other, // Check each pair of memory operands from both instructions, which can't // alias only if all pairs won't alias. - for (auto *MMOa : memoperands()) - for (auto *MMOb : Other.memoperands()) + for (auto *MMOa : memoperands()) { + for (auto *MMOb : Other.memoperands()) { + if (!MMOa->isStore() && !MMOb->isStore()) + continue; if (MemOperandsHaveAlias(MFI, AA, UseTBAA, MMOa, MMOb)) return true; + } + } return false; } diff --git a/llvm/lib/CodeGen/MachineInstrBundle.cpp b/llvm/lib/CodeGen/MachineInstrBundle.cpp index f4c1a8b..fa654f2 100644 --- a/llvm/lib/CodeGen/MachineInstrBundle.cpp +++ b/llvm/lib/CodeGen/MachineInstrBundle.cpp @@ -143,6 +143,7 @@ void llvm::finalizeBundle(MachineBasicBlock &MBB, SmallSet<Register, 8> KilledUseSet; SmallSet<Register, 8> UndefUseSet; SmallVector<std::pair<Register, Register>> TiedOperands; + SmallVector<MachineInstr *> MemMIs; for (auto MII = FirstMI; MII != LastMI; ++MII) { // Debug instructions have no effects to track. if (MII->isDebugInstr()) @@ -206,6 +207,9 @@ void llvm::finalizeBundle(MachineBasicBlock &MBB, MIB.setMIFlag(MachineInstr::FrameSetup); if (MII->getFlag(MachineInstr::FrameDestroy)) MIB.setMIFlag(MachineInstr::FrameDestroy); + + if (MII->mayLoadOrStore()) + MemMIs.push_back(&*MII); } for (Register Reg : LocalDefs) { @@ -231,6 +235,8 @@ void llvm::finalizeBundle(MachineBasicBlock &MBB, assert(UseIdx < ExternUses.size()); MIB->tieOperands(DefIdx, LocalDefs.size() + UseIdx); } + + MIB->cloneMergedMemRefs(MF, MemMIs); } /// finalizeBundle - Same functionality as the previous finalizeBundle except diff --git a/llvm/lib/CodeGen/SafeStack.cpp b/llvm/lib/CodeGen/SafeStack.cpp index e9ffa85..6b747f3 100644 --- a/llvm/lib/CodeGen/SafeStack.cpp +++ b/llvm/lib/CodeGen/SafeStack.cpp @@ -196,8 +196,6 @@ public: bool run(); }; -constexpr Align SafeStack::StackAlignment; - uint64_t SafeStack::getStaticAllocaAllocationSize(const AllocaInst* AI) { uint64_t Size = DL.getTypeAllocSize(AI->getAllocatedType()); if (AI->isArrayAllocation()) { diff --git a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp index fa0c899..2f598b2 100644 --- a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp @@ -3526,8 +3526,7 @@ void SelectionDAGBuilder::visitCallBr(const CallBrInst &I) { // Update successor info. addSuccessorWithProb(CallBrMBB, Return, BranchProbability::getOne()); - for (unsigned i = 0, e = I.getNumIndirectDests(); i < e; ++i) { - BasicBlock *Dest = I.getIndirectDest(i); + for (BasicBlock *Dest : I.getIndirectDests()) { MachineBasicBlock *Target = FuncInfo.getMBB(Dest); Target->setIsInlineAsmBrIndirectTarget(); // If we introduce a type of asm goto statement that is permitted to use an @@ -4759,7 +4758,7 @@ void SelectionDAGBuilder::visitStoreToSwiftError(const StoreInst &I) { SmallVector<uint64_t, 4> Offsets; const Value *SrcV = I.getOperand(0); ComputeValueVTs(DAG.getTargetLoweringInfo(), DAG.getDataLayout(), - SrcV->getType(), ValueVTs, &Offsets, 0); + SrcV->getType(), ValueVTs, /*MemVTs=*/nullptr, &Offsets, 0); assert(ValueVTs.size() == 1 && Offsets[0] == 0 && "expect a single EVT for swifterror"); @@ -4795,7 +4794,7 @@ void SelectionDAGBuilder::visitLoadFromSwiftError(const LoadInst &I) { SmallVector<EVT, 4> ValueVTs; SmallVector<uint64_t, 4> Offsets; ComputeValueVTs(DAG.getTargetLoweringInfo(), DAG.getDataLayout(), Ty, - ValueVTs, &Offsets, 0); + ValueVTs, /*MemVTs=*/nullptr, &Offsets, 0); assert(ValueVTs.size() == 1 && Offsets[0] == 0 && "expect a single EVT for swifterror"); @@ -5313,18 +5312,26 @@ void SelectionDAGBuilder::visitAtomicStore(const StoreInst &I) { DAG.setRoot(OutChain); } -/// visitTargetIntrinsic - Lower a call of a target intrinsic to an INTRINSIC -/// node. -void SelectionDAGBuilder::visitTargetIntrinsic(const CallInst &I, - unsigned Intrinsic) { - // Ignore the callsite's attributes. A specific call site may be marked with - // readnone, but the lowering code will expect the chain based on the - // definition. +/// Check if this intrinsic call depends on the chain (1st return value) +/// and if it only *loads* memory. +/// Ignore the callsite's attributes. A specific call site may be marked with +/// readnone, but the lowering code will expect the chain based on the +/// definition. +std::pair<bool, bool> +SelectionDAGBuilder::getTargetIntrinsicCallProperties(const CallBase &I) { const Function *F = I.getCalledFunction(); bool HasChain = !F->doesNotAccessMemory(); bool OnlyLoad = HasChain && F->onlyReadsMemory() && F->willReturn() && F->doesNotThrow(); + return {HasChain, OnlyLoad}; +} + +SmallVector<SDValue, 8> SelectionDAGBuilder::getTargetIntrinsicOperands( + const CallBase &I, bool HasChain, bool OnlyLoad, + TargetLowering::IntrinsicInfo *TgtMemIntrinsicInfo) { + const TargetLowering &TLI = DAG.getTargetLoweringInfo(); + // Build the operand list. SmallVector<SDValue, 8> Ops; if (HasChain) { // If this intrinsic has side-effects, chainify it. @@ -5336,17 +5343,10 @@ void SelectionDAGBuilder::visitTargetIntrinsic(const CallInst &I, } } - // Info is set by getTgtMemIntrinsic - TargetLowering::IntrinsicInfo Info; - const TargetLowering &TLI = DAG.getTargetLoweringInfo(); - bool IsTgtIntrinsic = TLI.getTgtMemIntrinsic(Info, I, - DAG.getMachineFunction(), - Intrinsic); - // Add the intrinsic ID as an integer operand if it's not a target intrinsic. - if (!IsTgtIntrinsic || Info.opc == ISD::INTRINSIC_VOID || - Info.opc == ISD::INTRINSIC_W_CHAIN) - Ops.push_back(DAG.getTargetConstant(Intrinsic, getCurSDLoc(), + if (!TgtMemIntrinsicInfo || TgtMemIntrinsicInfo->opc == ISD::INTRINSIC_VOID || + TgtMemIntrinsicInfo->opc == ISD::INTRINSIC_W_CHAIN) + Ops.push_back(DAG.getTargetConstant(I.getIntrinsicID(), getCurSDLoc(), TLI.getPointerTy(DAG.getDataLayout()))); // Add all operands of the call to the operand list. @@ -5369,13 +5369,85 @@ void SelectionDAGBuilder::visitTargetIntrinsic(const CallInst &I, } } + if (std::optional<OperandBundleUse> Bundle = + I.getOperandBundle(LLVMContext::OB_convergencectrl)) { + Value *Token = Bundle->Inputs[0].get(); + SDValue ConvControlToken = getValue(Token); + assert(Ops.back().getValueType() != MVT::Glue && + "Did not expect another glue node here."); + ConvControlToken = + DAG.getNode(ISD::CONVERGENCECTRL_GLUE, {}, MVT::Glue, ConvControlToken); + Ops.push_back(ConvControlToken); + } + + return Ops; +} + +SDVTList SelectionDAGBuilder::getTargetIntrinsicVTList(const CallBase &I, + bool HasChain) { + const TargetLowering &TLI = DAG.getTargetLoweringInfo(); + SmallVector<EVT, 4> ValueVTs; ComputeValueVTs(TLI, DAG.getDataLayout(), I.getType(), ValueVTs); if (HasChain) ValueVTs.push_back(MVT::Other); - SDVTList VTs = DAG.getVTList(ValueVTs); + return DAG.getVTList(ValueVTs); +} + +/// Get an INTRINSIC node for a target intrinsic which does not touch memory. +SDValue SelectionDAGBuilder::getTargetNonMemIntrinsicNode( + const Type &IntrinsicVT, bool HasChain, ArrayRef<SDValue> Ops, + const SDVTList &VTs) { + if (!HasChain) + return DAG.getNode(ISD::INTRINSIC_WO_CHAIN, getCurSDLoc(), VTs, Ops); + if (!IntrinsicVT.isVoidTy()) + return DAG.getNode(ISD::INTRINSIC_W_CHAIN, getCurSDLoc(), VTs, Ops); + return DAG.getNode(ISD::INTRINSIC_VOID, getCurSDLoc(), VTs, Ops); +} + +/// Set root, convert return type if necessary and check alignment. +SDValue SelectionDAGBuilder::handleTargetIntrinsicRet(const CallBase &I, + bool HasChain, + bool OnlyLoad, + SDValue Result) { + if (HasChain) { + SDValue Chain = Result.getValue(Result.getNode()->getNumValues() - 1); + if (OnlyLoad) + PendingLoads.push_back(Chain); + else + DAG.setRoot(Chain); + } + + if (I.getType()->isVoidTy()) + return Result; + + if (MaybeAlign Alignment = I.getRetAlign(); InsertAssertAlign && Alignment) { + // Insert `assertalign` node if there's an alignment. + Result = DAG.getAssertAlign(getCurSDLoc(), Result, Alignment.valueOrOne()); + } else if (!isa<VectorType>(I.getType())) { + Result = lowerRangeToAssertZExt(DAG, I, Result); + } + + return Result; +} + +/// visitTargetIntrinsic - Lower a call of a target intrinsic to an INTRINSIC +/// node. +void SelectionDAGBuilder::visitTargetIntrinsic(const CallInst &I, + unsigned Intrinsic) { + auto [HasChain, OnlyLoad] = getTargetIntrinsicCallProperties(I); + + // Info is set by getTgtMemIntrinsic + TargetLowering::IntrinsicInfo Info; + const TargetLowering &TLI = DAG.getTargetLoweringInfo(); + bool IsTgtMemIntrinsic = + TLI.getTgtMemIntrinsic(Info, I, DAG.getMachineFunction(), Intrinsic); + + SmallVector<SDValue, 8> Ops = getTargetIntrinsicOperands( + I, HasChain, OnlyLoad, IsTgtMemIntrinsic ? &Info : nullptr); + SDVTList VTs = getTargetIntrinsicVTList(I, HasChain); // Propagate fast-math-flags from IR to node(s). SDNodeFlags Flags; @@ -5386,19 +5458,9 @@ void SelectionDAGBuilder::visitTargetIntrinsic(const CallInst &I, // Create the node. SDValue Result; - if (auto Bundle = I.getOperandBundle(LLVMContext::OB_convergencectrl)) { - auto *Token = Bundle->Inputs[0].get(); - SDValue ConvControlToken = getValue(Token); - assert(Ops.back().getValueType() != MVT::Glue && - "Did not expected another glue node here."); - ConvControlToken = - DAG.getNode(ISD::CONVERGENCECTRL_GLUE, {}, MVT::Glue, ConvControlToken); - Ops.push_back(ConvControlToken); - } - // In some cases, custom collection of operands from CallInst I may be needed. TLI.CollectTargetIntrinsicOperands(I, Ops, DAG); - if (IsTgtIntrinsic) { + if (IsTgtMemIntrinsic) { // This is target intrinsic that touches memory // // TODO: We currently just fallback to address space 0 if getTgtMemIntrinsic @@ -5418,34 +5480,11 @@ void SelectionDAGBuilder::visitTargetIntrinsic(const CallInst &I, Info.ssid, Info.order, Info.failureOrder); Result = DAG.getMemIntrinsicNode(Info.opc, getCurSDLoc(), VTs, Ops, MemVT, MMO); - } else if (!HasChain) { - Result = DAG.getNode(ISD::INTRINSIC_WO_CHAIN, getCurSDLoc(), VTs, Ops); - } else if (!I.getType()->isVoidTy()) { - Result = DAG.getNode(ISD::INTRINSIC_W_CHAIN, getCurSDLoc(), VTs, Ops); } else { - Result = DAG.getNode(ISD::INTRINSIC_VOID, getCurSDLoc(), VTs, Ops); - } - - if (HasChain) { - SDValue Chain = Result.getValue(Result.getNode()->getNumValues()-1); - if (OnlyLoad) - PendingLoads.push_back(Chain); - else - DAG.setRoot(Chain); + Result = getTargetNonMemIntrinsicNode(*I.getType(), HasChain, Ops, VTs); } - if (!I.getType()->isVoidTy()) { - if (!isa<VectorType>(I.getType())) - Result = lowerRangeToAssertZExt(DAG, I, Result); - - MaybeAlign Alignment = I.getRetAlign(); - - // Insert `assertalign` node if there's an alignment. - if (InsertAssertAlign && Alignment) { - Result = - DAG.getAssertAlign(getCurSDLoc(), Result, Alignment.valueOrOne()); - } - } + Result = handleTargetIntrinsicRet(I, HasChain, OnlyLoad, Result); setValue(&I, Result); } @@ -7772,6 +7811,17 @@ void SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, return; } + case Intrinsic::reloc_none: { + Metadata *MD = cast<MetadataAsValue>(I.getArgOperand(0))->getMetadata(); + StringRef SymbolName = cast<MDString>(MD)->getString(); + SDValue Ops[2] = { + getRoot(), + DAG.getTargetExternalSymbol( + SymbolName.data(), TLI.getProgramPointerTy(DAG.getDataLayout()))}; + DAG.setRoot(DAG.getNode(ISD::RELOC_NONE, sdl, MVT::Other, Ops)); + return; + } + case Intrinsic::eh_exceptionpointer: case Intrinsic::eh_exceptioncode: { // Get the exception pointer vreg, copy from it, and resize it to fit. diff --git a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.h b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.h index 47e19f7..ed63bee 100644 --- a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.h +++ b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.h @@ -727,6 +727,17 @@ private: MCSymbol *&BeginLabel); SDValue lowerEndEH(SDValue Chain, const InvokeInst *II, const BasicBlock *EHPadBB, MCSymbol *BeginLabel); + + std::pair<bool, bool> getTargetIntrinsicCallProperties(const CallBase &I); + SmallVector<SDValue, 8> getTargetIntrinsicOperands( + const CallBase &I, bool HasChain, bool OnlyLoad, + TargetLowering::IntrinsicInfo *TgtMemIntrinsicInfo = nullptr); + SDVTList getTargetIntrinsicVTList(const CallBase &I, bool HasChain); + SDValue getTargetNonMemIntrinsicNode(const Type &IntrinsicVT, bool HasChain, + ArrayRef<SDValue> Ops, + const SDVTList &VTs); + SDValue handleTargetIntrinsicRet(const CallBase &I, bool HasChain, + bool OnlyLoad, SDValue Result); }; /// This struct represents the registers (physical or virtual) diff --git a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGDumper.cpp b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGDumper.cpp index 77377d3..d3e1628 100644 --- a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGDumper.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGDumper.cpp @@ -472,6 +472,8 @@ std::string SDNode::getOperationName(const SelectionDAG *G) const { case ISD::LIFETIME_END: return "lifetime.end"; case ISD::FAKE_USE: return "fake_use"; + case ISD::RELOC_NONE: + return "reloc_none"; case ISD::PSEUDO_PROBE: return "pseudoprobe"; case ISD::GC_TRANSITION_START: return "gc_transition.start"; diff --git a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp index 6c11c5b..8bc5d2f 100644 --- a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp @@ -2550,6 +2550,11 @@ void SelectionDAGISel::Select_FAKE_USE(SDNode *N) { N->getOperand(1), N->getOperand(0)); } +void SelectionDAGISel::Select_RELOC_NONE(SDNode *N) { + CurDAG->SelectNodeTo(N, TargetOpcode::RELOC_NONE, N->getValueType(0), + N->getOperand(1), N->getOperand(0)); +} + void SelectionDAGISel::Select_FREEZE(SDNode *N) { // TODO: We don't have FREEZE pseudo-instruction in MachineInstr-level now. // If FREEZE instruction is added later, the code below must be changed as @@ -3325,6 +3330,9 @@ void SelectionDAGISel::SelectCodeCommon(SDNode *NodeToMatch, case ISD::FAKE_USE: Select_FAKE_USE(NodeToMatch); return; + case ISD::RELOC_NONE: + Select_RELOC_NONE(NodeToMatch); + return; case ISD::FREEZE: Select_FREEZE(NodeToMatch); return; diff --git a/llvm/lib/CodeGen/TwoAddressInstructionPass.cpp b/llvm/lib/CodeGen/TwoAddressInstructionPass.cpp index 414e414..b99e1c7 100644 --- a/llvm/lib/CodeGen/TwoAddressInstructionPass.cpp +++ b/llvm/lib/CodeGen/TwoAddressInstructionPass.cpp @@ -1665,6 +1665,17 @@ void TwoAddressInstructionImpl::processTiedPairs(MachineInstr *MI, // by SubRegB is compatible with RegA with no subregister. So regardless of // whether the dest oper writes a subreg, the source oper should not. MO.setSubReg(0); + + // Update uses of RegB to uses of RegA inside the bundle. + if (MI->isBundle()) { + for (MachineOperand &MO : mi_bundle_ops(*MI)) { + if (MO.isReg() && MO.getReg() == RegB) { + assert(MO.getSubReg() == 0 && SubRegB == 0 && + "tied subregister uses in bundled instructions not supported"); + MO.setReg(RegA); + } + } + } } if (AllUsesCopied) { diff --git a/llvm/lib/IR/DebugInfoMetadata.cpp b/llvm/lib/IR/DebugInfoMetadata.cpp index fafc325..a98e925 100644 --- a/llvm/lib/IR/DebugInfoMetadata.cpp +++ b/llvm/lib/IR/DebugInfoMetadata.cpp @@ -962,16 +962,29 @@ DIType *DIDerivedType::getClassType() const { assert(getTag() == dwarf::DW_TAG_ptr_to_member_type); return cast_or_null<DIType>(getExtraData()); } + +// Helper function to extract ConstantAsMetadata from ExtraData, +// handling extra data MDTuple unwrapping if needed. +static ConstantAsMetadata *extractConstantMetadata(Metadata *ExtraData) { + Metadata *ED = ExtraData; + if (auto *Tuple = dyn_cast_or_null<MDTuple>(ED)) { + if (Tuple->getNumOperands() != 1) + return nullptr; + ED = Tuple->getOperand(0); + } + return cast_or_null<ConstantAsMetadata>(ED); +} + uint32_t DIDerivedType::getVBPtrOffset() const { assert(getTag() == dwarf::DW_TAG_inheritance); - if (auto *CM = cast_or_null<ConstantAsMetadata>(getExtraData())) + if (auto *CM = extractConstantMetadata(getExtraData())) if (auto *CI = dyn_cast_or_null<ConstantInt>(CM->getValue())) return static_cast<uint32_t>(CI->getZExtValue()); return 0; } Constant *DIDerivedType::getStorageOffsetInBits() const { assert(getTag() == dwarf::DW_TAG_member && isBitField()); - if (auto *C = cast_or_null<ConstantAsMetadata>(getExtraData())) + if (auto *C = extractConstantMetadata(getExtraData())) return C->getValue(); return nullptr; } @@ -980,13 +993,13 @@ Constant *DIDerivedType::getConstant() const { assert((getTag() == dwarf::DW_TAG_member || getTag() == dwarf::DW_TAG_variable) && isStaticMember()); - if (auto *C = cast_or_null<ConstantAsMetadata>(getExtraData())) + if (auto *C = extractConstantMetadata(getExtraData())) return C->getValue(); return nullptr; } Constant *DIDerivedType::getDiscriminantValue() const { assert(getTag() == dwarf::DW_TAG_member && !isStaticMember()); - if (auto *C = cast_or_null<ConstantAsMetadata>(getExtraData())) + if (auto *C = extractConstantMetadata(getExtraData())) return C->getValue(); return nullptr; } diff --git a/llvm/lib/IR/Verifier.cpp b/llvm/lib/IR/Verifier.cpp index 24f90bf..f1e473a 100644 --- a/llvm/lib/IR/Verifier.cpp +++ b/llvm/lib/IR/Verifier.cpp @@ -6013,6 +6013,12 @@ void Verifier::visitIntrinsicCall(Intrinsic::ID ID, CallBase &Call) { Check(cast<ConstantInt>(Call.getArgOperand(3))->getZExtValue() < 2, "cache type argument to llvm.prefetch must be 0-1", Call); break; + case Intrinsic::reloc_none: { + Check(isa<MDString>( + cast<MetadataAsValue>(Call.getArgOperand(0))->getMetadata()), + "llvm.reloc.none argument must be a metadata string", &Call); + break; + } case Intrinsic::stackprotector: Check(isa<AllocaInst>(Call.getArgOperand(1)->stripPointerCasts()), "llvm.stackprotector parameter #2 must resolve to an alloca.", Call); diff --git a/llvm/lib/ProfileData/InstrProf.cpp b/llvm/lib/ProfileData/InstrProf.cpp index 0208735..5498787 100644 --- a/llvm/lib/ProfileData/InstrProf.cpp +++ b/llvm/lib/ProfileData/InstrProf.cpp @@ -1690,7 +1690,7 @@ Expected<Header> Header::readFromBuffer(const unsigned char *Buffer) { IndexedInstrProf::ProfVersion::CurrentVersion) return make_error<InstrProfError>(instrprof_error::unsupported_version); - static_assert(IndexedInstrProf::ProfVersion::CurrentVersion == Version12, + static_assert(IndexedInstrProf::ProfVersion::CurrentVersion == Version13, "Please update the reader as needed when a new field is added " "or when indexed profile version gets bumped."); @@ -1723,10 +1723,11 @@ size_t Header::size() const { // of the header, and byte offset of existing fields shouldn't change when // indexed profile version gets incremented. static_assert( - IndexedInstrProf::ProfVersion::CurrentVersion == Version12, + IndexedInstrProf::ProfVersion::CurrentVersion == Version13, "Please update the size computation below if a new field has " "been added to the header; for a version bump without new " "fields, add a case statement to fall through to the latest version."); + case 13ull: case 12ull: return 72; case 11ull: diff --git a/llvm/lib/ProfileData/InstrProfWriter.cpp b/llvm/lib/ProfileData/InstrProfWriter.cpp index a347351..0f15ca8 100644 --- a/llvm/lib/ProfileData/InstrProfWriter.cpp +++ b/llvm/lib/ProfileData/InstrProfWriter.cpp @@ -542,7 +542,7 @@ Error InstrProfWriter::writeImpl(ProfOStream &OS) { // The WritePrevVersion handling will either need to be removed or updated // if the version is advanced beyond 12. static_assert(IndexedInstrProf::ProfVersion::CurrentVersion == - IndexedInstrProf::ProfVersion::Version12); + IndexedInstrProf::ProfVersion::Version13); if (static_cast<bool>(ProfileKind & InstrProfKind::IRInstrumentation)) Header.Version |= VARIANT_MASK_IR_PROF; if (static_cast<bool>(ProfileKind & InstrProfKind::ContextSensitive)) diff --git a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp index d08f9b9..40e6400 100644 --- a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp +++ b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp @@ -50,6 +50,7 @@ #include "llvm/CodeGen/MachineInstrBuilder.h" #include "llvm/CodeGen/MachineMemOperand.h" #include "llvm/CodeGen/MachineRegisterInfo.h" +#include "llvm/CodeGen/SDPatternMatch.h" #include "llvm/CodeGen/SelectionDAG.h" #include "llvm/CodeGen/SelectionDAGNodes.h" #include "llvm/CodeGen/TargetCallingConv.h" @@ -104,7 +105,6 @@ #include <vector> using namespace llvm; -using namespace llvm::PatternMatch; #define DEBUG_TYPE "aarch64-lower" @@ -1174,6 +1174,7 @@ AArch64TargetLowering::AArch64TargetLowering(const TargetMachine &TM, setTargetDAGCombine(ISD::SHL); setTargetDAGCombine(ISD::VECTOR_DEINTERLEAVE); + setTargetDAGCombine(ISD::CTPOP); // In case of strict alignment, avoid an excessive number of byte wide stores. MaxStoresPerMemsetOptSize = 8; @@ -11330,9 +11331,10 @@ SDValue AArch64TargetLowering::LowerMinMax(SDValue Op, break; } + // Note: This lowering only overrides NEON for v1i64 and v2i64, where we + // prefer using SVE if available. if (VT.isScalableVector() || - useSVEForFixedLengthVectorVT( - VT, /*OverrideNEON=*/Subtarget->useSVEForFixedLengthVectors())) { + useSVEForFixedLengthVectorVT(VT, /*OverrideNEON=*/true)) { switch (Opcode) { default: llvm_unreachable("Wrong instruction"); @@ -17554,6 +17556,7 @@ bool AArch64TargetLowering::optimizeExtendOrTruncateConversion( // udot instruction. if (SrcWidth * 4 <= DstWidth) { if (all_of(I->users(), [&](auto *U) { + using namespace llvm::PatternMatch; auto *SingleUser = cast<Instruction>(&*U); if (match(SingleUser, m_c_Mul(m_Specific(I), m_SExt(m_Value())))) return true; @@ -17825,6 +17828,7 @@ bool AArch64TargetLowering::lowerInterleavedLoad( // into shift / and masks. For the moment we do this just for uitofp (not // zext) to avoid issues with widening instructions. if (Shuffles.size() == 4 && all_of(Shuffles, [](ShuffleVectorInst *SI) { + using namespace llvm::PatternMatch; return SI->hasOneUse() && match(SI->user_back(), m_UIToFP(m_Value())) && SI->getType()->getScalarSizeInBits() * 4 == SI->user_back()->getType()->getScalarSizeInBits(); @@ -27841,6 +27845,35 @@ static SDValue performRNDRCombine(SDNode *N, SelectionDAG &DAG) { {A, DAG.getZExtOrTrunc(B, DL, MVT::i1), A.getValue(2)}, DL); } +static SDValue performCTPOPCombine(SDNode *N, + TargetLowering::DAGCombinerInfo &DCI, + SelectionDAG &DAG) { + using namespace llvm::SDPatternMatch; + if (!DCI.isBeforeLegalize()) + return SDValue(); + + // ctpop(zext(bitcast(vector_mask))) -> neg(signed_reduce_add(vector_mask)) + SDValue Mask; + if (!sd_match(N->getOperand(0), m_ZExt(m_BitCast(m_Value(Mask))))) + return SDValue(); + + EVT VT = N->getValueType(0); + EVT MaskVT = Mask.getValueType(); + + if (VT.isVector() || !MaskVT.isFixedLengthVector() || + MaskVT.getVectorElementType() != MVT::i1) + return SDValue(); + + EVT ReduceInVT = + EVT::getVectorVT(*DAG.getContext(), VT, MaskVT.getVectorElementCount()); + + SDLoc DL(N); + // Sign extend to best fit ZeroOrNegativeOneBooleanContent. + SDValue ExtMask = DAG.getNode(ISD::SIGN_EXTEND, DL, ReduceInVT, Mask); + SDValue NegPopCount = DAG.getNode(ISD::VECREDUCE_ADD, DL, VT, ExtMask); + return DAG.getNegative(NegPopCount, DL, VT); +} + SDValue AArch64TargetLowering::PerformDAGCombine(SDNode *N, DAGCombinerInfo &DCI) const { SelectionDAG &DAG = DCI.DAG; @@ -28186,6 +28219,8 @@ SDValue AArch64TargetLowering::PerformDAGCombine(SDNode *N, return performScalarToVectorCombine(N, DCI, DAG); case ISD::SHL: return performSHLCombine(N, DCI, DAG); + case ISD::CTPOP: + return performCTPOPCombine(N, DCI, DAG); } return SDValue(); } diff --git a/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp b/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp index 1b559a6..f5081a9 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp @@ -1248,7 +1248,8 @@ void AMDGPUTargetLowering::analyzeFormalArgumentsCompute( SmallVector<EVT, 16> ValueVTs; SmallVector<uint64_t, 16> Offsets; - ComputeValueVTs(*this, DL, BaseArgTy, ValueVTs, &Offsets, ArgOffset); + ComputeValueVTs(*this, DL, BaseArgTy, ValueVTs, /*MemVTs=*/nullptr, + &Offsets, ArgOffset); for (unsigned Value = 0, NumValues = ValueVTs.size(); Value != NumValues; ++Value) { diff --git a/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp b/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp index 9460145..6ce18ea 100644 --- a/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp +++ b/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp @@ -3917,6 +3917,9 @@ bool SIInstrInfo::areMemAccessesTriviallyDisjoint(const MachineInstr &MIa, if (isLDSDMA(MIa) || isLDSDMA(MIb)) return false; + if (MIa.isBundle() || MIb.isBundle()) + return false; + // TODO: Should we check the address space from the MachineMemOperand? That // would allow us to distinguish objects we know don't alias based on the // underlying address space, even if it was lowered to a different one, diff --git a/llvm/lib/Target/BPF/BPFAbstractMemberAccess.cpp b/llvm/lib/Target/BPF/BPFAbstractMemberAccess.cpp index 8c7bc2f..81303fa 100644 --- a/llvm/lib/Target/BPF/BPFAbstractMemberAccess.cpp +++ b/llvm/lib/Target/BPF/BPFAbstractMemberAccess.cpp @@ -97,7 +97,6 @@ #define DEBUG_TYPE "bpf-abstract-member-access" namespace llvm { -constexpr StringRef BPFCoreSharedInfo::AmaAttr; uint32_t BPFCoreSharedInfo::SeqNum; Instruction *BPFCoreSharedInfo::insertPassThrough(Module *M, BasicBlock *BB, diff --git a/llvm/lib/Target/BPF/BPFISelLowering.cpp b/llvm/lib/Target/BPF/BPFISelLowering.cpp index 6e5520c..3c61216 100644 --- a/llvm/lib/Target/BPF/BPFISelLowering.cpp +++ b/llvm/lib/Target/BPF/BPFISelLowering.cpp @@ -803,26 +803,6 @@ SDValue BPFTargetLowering::LowerJumpTable(SDValue Op, SelectionDAG &DAG) const { return getAddr(N, DAG); } -const char *BPFTargetLowering::getTargetNodeName(unsigned Opcode) const { - switch ((BPFISD::NodeType)Opcode) { - case BPFISD::FIRST_NUMBER: - break; - case BPFISD::RET_GLUE: - return "BPFISD::RET_GLUE"; - case BPFISD::CALL: - return "BPFISD::CALL"; - case BPFISD::SELECT_CC: - return "BPFISD::SELECT_CC"; - case BPFISD::BR_CC: - return "BPFISD::BR_CC"; - case BPFISD::Wrapper: - return "BPFISD::Wrapper"; - case BPFISD::MEMCPY: - return "BPFISD::MEMCPY"; - } - return nullptr; -} - static SDValue getTargetNode(ConstantPoolSDNode *N, const SDLoc &DL, EVT Ty, SelectionDAG &DAG, unsigned Flags) { return DAG.getTargetConstantPool(N->getConstVal(), Ty, N->getAlign(), diff --git a/llvm/lib/Target/BPF/BPFISelLowering.h b/llvm/lib/Target/BPF/BPFISelLowering.h index 5243d49..3d6e7c7 100644 --- a/llvm/lib/Target/BPF/BPFISelLowering.h +++ b/llvm/lib/Target/BPF/BPFISelLowering.h @@ -20,17 +20,6 @@ namespace llvm { class BPFSubtarget; -namespace BPFISD { -enum NodeType : unsigned { - FIRST_NUMBER = ISD::BUILTIN_OP_END, - RET_GLUE, - CALL, - SELECT_CC, - BR_CC, - Wrapper, - MEMCPY -}; -} class BPFTargetLowering : public TargetLowering { public: @@ -39,9 +28,6 @@ public: // Provide custom lowering hooks for some operations. SDValue LowerOperation(SDValue Op, SelectionDAG &DAG) const override; - // This method returns the name of a target specific DAG node. - const char *getTargetNodeName(unsigned Opcode) const override; - // This method decides whether folding a constant offset // with the given GlobalAddress is legal. bool isOffsetFoldingLegal(const GlobalAddressSDNode *GA) const override; diff --git a/llvm/lib/Target/BPF/BPFInstrInfo.td b/llvm/lib/Target/BPF/BPFInstrInfo.td index 51c32b2..bdacf9c 100644 --- a/llvm/lib/Target/BPF/BPFInstrInfo.td +++ b/llvm/lib/Target/BPF/BPFInstrInfo.td @@ -41,14 +41,12 @@ def BPFcallseq_start: SDNode<"ISD::CALLSEQ_START", SDT_BPFCallSeqStart, [SDNPHasChain, SDNPOutGlue]>; def BPFcallseq_end : SDNode<"ISD::CALLSEQ_END", SDT_BPFCallSeqEnd, [SDNPHasChain, SDNPOptInGlue, SDNPOutGlue]>; -def BPFbrcc : SDNode<"BPFISD::BR_CC", SDT_BPFBrCC, - [SDNPHasChain, SDNPOutGlue, SDNPInGlue]>; +def BPFbrcc : SDNode<"BPFISD::BR_CC", SDT_BPFBrCC, [SDNPHasChain]>; def BPFselectcc : SDNode<"BPFISD::SELECT_CC", SDT_BPFSelectCC>; def BPFWrapper : SDNode<"BPFISD::Wrapper", SDT_BPFWrapper>; def BPFmemcpy : SDNode<"BPFISD::MEMCPY", SDT_BPFMEMCPY, - [SDNPHasChain, SDNPInGlue, SDNPOutGlue, - SDNPMayStore, SDNPMayLoad]>; + [SDNPHasChain, SDNPMayStore, SDNPMayLoad]>; def BPFIsLittleEndian : Predicate<"Subtarget->isLittleEndian()">; def BPFIsBigEndian : Predicate<"!Subtarget->isLittleEndian()">; def BPFHasALU32 : Predicate<"Subtarget->getHasAlu32()">; diff --git a/llvm/lib/Target/BPF/BPFPreserveDIType.cpp b/llvm/lib/Target/BPF/BPFPreserveDIType.cpp index d3b0c02..6a11ea6 100644 --- a/llvm/lib/Target/BPF/BPFPreserveDIType.cpp +++ b/llvm/lib/Target/BPF/BPFPreserveDIType.cpp @@ -27,10 +27,6 @@ #define DEBUG_TYPE "bpf-preserve-di-type" -namespace llvm { -constexpr StringRef BPFCoreSharedInfo::TypeIdAttr; -} // namespace llvm - using namespace llvm; namespace { diff --git a/llvm/lib/Target/BPF/BPFSelectionDAGInfo.cpp b/llvm/lib/Target/BPF/BPFSelectionDAGInfo.cpp index 3e29e6c..0e6d35d 100644 --- a/llvm/lib/Target/BPF/BPFSelectionDAGInfo.cpp +++ b/llvm/lib/Target/BPF/BPFSelectionDAGInfo.cpp @@ -10,12 +10,20 @@ // //===----------------------------------------------------------------------===// +#include "BPFSelectionDAGInfo.h" #include "BPFTargetMachine.h" #include "llvm/CodeGen/SelectionDAG.h" + +#define GET_SDNODE_DESC +#include "BPFGenSDNodeInfo.inc" + using namespace llvm; #define DEBUG_TYPE "bpf-selectiondag-info" +BPFSelectionDAGInfo::BPFSelectionDAGInfo() + : SelectionDAGGenTargetInfo(BPFGenSDNodeInfo) {} + SDValue BPFSelectionDAGInfo::EmitTargetCodeForMemcpy( SelectionDAG &DAG, const SDLoc &dl, SDValue Chain, SDValue Dst, SDValue Src, SDValue Size, Align Alignment, bool isVolatile, bool AlwaysInline, @@ -31,11 +39,7 @@ SDValue BPFSelectionDAGInfo::EmitTargetCodeForMemcpy( if (StoresNumEstimate > getCommonMaxStoresPerMemFunc()) return SDValue(); - SDVTList VTs = DAG.getVTList(MVT::Other, MVT::Glue); - - Dst = DAG.getNode(BPFISD::MEMCPY, dl, VTs, Chain, Dst, Src, - DAG.getConstant(CopyLen, dl, MVT::i64), - DAG.getConstant(Alignment.value(), dl, MVT::i64)); - - return Dst.getValue(0); + return DAG.getNode(BPFISD::MEMCPY, dl, MVT::Other, Chain, Dst, Src, + DAG.getConstant(CopyLen, dl, MVT::i64), + DAG.getConstant(Alignment.value(), dl, MVT::i64)); } diff --git a/llvm/lib/Target/BPF/BPFSelectionDAGInfo.h b/llvm/lib/Target/BPF/BPFSelectionDAGInfo.h index 79f05e5..7345d2d 100644 --- a/llvm/lib/Target/BPF/BPFSelectionDAGInfo.h +++ b/llvm/lib/Target/BPF/BPFSelectionDAGInfo.h @@ -15,10 +15,15 @@ #include "llvm/CodeGen/SelectionDAGTargetInfo.h" +#define GET_SDNODE_ENUM +#include "BPFGenSDNodeInfo.inc" + namespace llvm { -class BPFSelectionDAGInfo : public SelectionDAGTargetInfo { +class BPFSelectionDAGInfo : public SelectionDAGGenTargetInfo { public: + BPFSelectionDAGInfo(); + SDValue EmitTargetCodeForMemcpy(SelectionDAG &DAG, const SDLoc &dl, SDValue Chain, SDValue Dst, SDValue Src, SDValue Size, Align Alignment, @@ -27,9 +32,8 @@ public: MachinePointerInfo SrcPtrInfo) const override; unsigned getCommonMaxStoresPerMemFunc() const { return 128; } - }; -} +} // namespace llvm #endif diff --git a/llvm/lib/Target/BPF/CMakeLists.txt b/llvm/lib/Target/BPF/CMakeLists.txt index 3678f13..fa539a0 100644 --- a/llvm/lib/Target/BPF/CMakeLists.txt +++ b/llvm/lib/Target/BPF/CMakeLists.txt @@ -10,6 +10,7 @@ tablegen(LLVM BPFGenDisassemblerTables.inc -gen-disassembler) tablegen(LLVM BPFGenInstrInfo.inc -gen-instr-info) tablegen(LLVM BPFGenMCCodeEmitter.inc -gen-emitter) tablegen(LLVM BPFGenRegisterInfo.inc -gen-register-info) +tablegen(LLVM BPFGenSDNodeInfo.inc -gen-sd-node-info) tablegen(LLVM BPFGenSubtargetInfo.inc -gen-subtarget) tablegen(LLVM BPFGenGlobalISel.inc -gen-global-isel) tablegen(LLVM BPFGenRegisterBank.inc -gen-register-bank) diff --git a/llvm/lib/Target/DirectX/DXILDataScalarization.cpp b/llvm/lib/Target/DirectX/DXILDataScalarization.cpp index d507d71..9f1616f 100644 --- a/llvm/lib/Target/DirectX/DXILDataScalarization.cpp +++ b/llvm/lib/Target/DirectX/DXILDataScalarization.cpp @@ -304,40 +304,76 @@ bool DataScalarizerVisitor::visitGetElementPtrInst(GetElementPtrInst &GEPI) { GEPOperator *GOp = cast<GEPOperator>(&GEPI); Value *PtrOperand = GOp->getPointerOperand(); Type *NewGEPType = GOp->getSourceElementType(); - bool NeedsTransform = false; // Unwrap GEP ConstantExprs to find the base operand and element type - while (auto *CE = dyn_cast<ConstantExpr>(PtrOperand)) { - if (auto *GEPCE = dyn_cast<GEPOperator>(CE)) { - GOp = GEPCE; - PtrOperand = GEPCE->getPointerOperand(); - NewGEPType = GEPCE->getSourceElementType(); - } else - break; + while (auto *GEPCE = dyn_cast_or_null<GEPOperator>( + dyn_cast<ConstantExpr>(PtrOperand))) { + GOp = GEPCE; + PtrOperand = GEPCE->getPointerOperand(); + NewGEPType = GEPCE->getSourceElementType(); } + Type *const OrigGEPType = NewGEPType; + Value *const OrigOperand = PtrOperand; + if (GlobalVariable *NewGlobal = lookupReplacementGlobal(PtrOperand)) { NewGEPType = NewGlobal->getValueType(); PtrOperand = NewGlobal; - NeedsTransform = true; } else if (AllocaInst *Alloca = dyn_cast<AllocaInst>(PtrOperand)) { Type *AllocatedType = Alloca->getAllocatedType(); if (isa<ArrayType>(AllocatedType) && - AllocatedType != GOp->getResultElementType()) { + AllocatedType != GOp->getResultElementType()) NewGEPType = AllocatedType; - NeedsTransform = true; + } else + return false; // Only GEPs into an alloca or global variable are considered + + // Defer changing i8 GEP types until dxil-flatten-arrays + if (OrigGEPType->isIntegerTy(8)) + NewGEPType = OrigGEPType; + + // If the original type is a "sub-type" of the new type, then ensure the gep + // correctly zero-indexes the extra dimensions to keep the offset calculation + // correct. + // Eg: + // i32, [4 x i32] and [8 x [4 x i32]] are sub-types of [8 x [4 x i32]], etc. + // + // So then: + // gep [4 x i32] %idx + // -> gep [8 x [4 x i32]], i32 0, i32 %idx + // gep i32 %idx + // -> gep [8 x [4 x i32]], i32 0, i32 0, i32 %idx + uint32_t MissingDims = 0; + Type *SubType = NewGEPType; + + // The new type will be in its array version; so match accordingly. + Type *const GEPArrType = equivalentArrayTypeFromVector(OrigGEPType); + + while (SubType != GEPArrType) { + MissingDims++; + + ArrayType *ArrType = dyn_cast<ArrayType>(SubType); + if (!ArrType) { + assert(SubType == GEPArrType && + "GEP uses an DXIL invalid sub-type of alloca/global variable"); + break; } + + SubType = ArrType->getElementType(); } + bool NeedsTransform = OrigOperand != PtrOperand || + OrigGEPType != NewGEPType || MissingDims != 0; + if (!NeedsTransform) return false; - // Keep scalar GEPs scalar; dxil-flatten-arrays will do flattening later - if (!isa<ArrayType>(GOp->getSourceElementType())) - NewGEPType = GOp->getSourceElementType(); - IRBuilder<> Builder(&GEPI); - SmallVector<Value *, MaxVecSize> Indices(GOp->indices()); + SmallVector<Value *, MaxVecSize> Indices; + + for (uint32_t I = 0; I < MissingDims; I++) + Indices.push_back(Builder.getInt32(0)); + llvm::append_range(Indices, GOp->indices()); + Value *NewGEP = Builder.CreateGEP(NewGEPType, PtrOperand, Indices, GOp->getName(), GOp->getNoWrapFlags()); diff --git a/llvm/lib/Target/DirectX/DXILIntrinsicExpansion.cpp b/llvm/lib/Target/DirectX/DXILIntrinsicExpansion.cpp index ebb7c26..e0d2dbd 100644 --- a/llvm/lib/Target/DirectX/DXILIntrinsicExpansion.cpp +++ b/llvm/lib/Target/DirectX/DXILIntrinsicExpansion.cpp @@ -197,6 +197,7 @@ static Value *expand16BitIsNormal(CallInst *Orig) { static bool isIntrinsicExpansion(Function &F) { switch (F.getIntrinsicID()) { + case Intrinsic::assume: case Intrinsic::abs: case Intrinsic::atan2: case Intrinsic::exp: @@ -988,6 +989,9 @@ static bool expandIntrinsic(Function &F, CallInst *Orig) { case Intrinsic::abs: Result = expandAbs(Orig); break; + case Intrinsic::assume: + Orig->eraseFromParent(); + return true; case Intrinsic::atan2: Result = expandAtan2Intrinsic(Orig); break; diff --git a/llvm/lib/Target/DirectX/DXILOpLowering.cpp b/llvm/lib/Target/DirectX/DXILOpLowering.cpp index 8720460..e46a393 100644 --- a/llvm/lib/Target/DirectX/DXILOpLowering.cpp +++ b/llvm/lib/Target/DirectX/DXILOpLowering.cpp @@ -904,8 +904,6 @@ public: case Intrinsic::dx_resource_casthandle: // NOTE: llvm.dbg.value is supported as is in DXIL. case Intrinsic::dbg_value: - // NOTE: llvm.assume is supported as is in DXIL. - case Intrinsic::assume: case Intrinsic::not_intrinsic: if (F.use_empty()) F.eraseFromParent(); diff --git a/llvm/lib/Target/Hexagon/HexagonInstrInfo.cpp b/llvm/lib/Target/Hexagon/HexagonInstrInfo.cpp index 47726d6..55bafde 100644 --- a/llvm/lib/Target/Hexagon/HexagonInstrInfo.cpp +++ b/llvm/lib/Target/Hexagon/HexagonInstrInfo.cpp @@ -4753,6 +4753,19 @@ bool HexagonInstrInfo::getBundleNoShuf(const MachineInstr &MIB) const { return (Operand.isImm() && (Operand.getImm() & memShufDisabledMask) != 0); } +bool HexagonInstrInfo::isQFPMul(const MachineInstr *MI) const { + return (MI->getOpcode() == Hexagon::V6_vmpy_qf16_hf || + MI->getOpcode() == Hexagon::V6_vmpy_qf16_mix_hf || + MI->getOpcode() == Hexagon::V6_vmpy_qf32_hf || + MI->getOpcode() == Hexagon::V6_vmpy_qf32_mix_hf || + MI->getOpcode() == Hexagon::V6_vmpy_qf32_sf || + MI->getOpcode() == Hexagon::V6_vmpy_qf16_mix_hf || + MI->getOpcode() == Hexagon::V6_vmpy_qf16 || + MI->getOpcode() == Hexagon::V6_vmpy_qf32_mix_hf || + MI->getOpcode() == Hexagon::V6_vmpy_qf32_qf16 || + MI->getOpcode() == Hexagon::V6_vmpy_qf32); +} + // Addressing mode relations. short HexagonInstrInfo::changeAddrMode_abs_io(short Opc) const { return Opc >= 0 ? Hexagon::changeAddrMode_abs_io(Opc) : Opc; diff --git a/llvm/lib/Target/Hexagon/HexagonInstrInfo.h b/llvm/lib/Target/Hexagon/HexagonInstrInfo.h index c17e527..48adf82 100644 --- a/llvm/lib/Target/Hexagon/HexagonInstrInfo.h +++ b/llvm/lib/Target/Hexagon/HexagonInstrInfo.h @@ -532,6 +532,7 @@ public: } MCInst getNop() const override; + bool isQFPMul(const MachineInstr *MF) const; }; /// \brief Create RegSubRegPair from a register MachineOperand diff --git a/llvm/lib/Target/Hexagon/HexagonQFPOptimizer.cpp b/llvm/lib/Target/Hexagon/HexagonQFPOptimizer.cpp index f29a739..8801f69 100644 --- a/llvm/lib/Target/Hexagon/HexagonQFPOptimizer.cpp +++ b/llvm/lib/Target/Hexagon/HexagonQFPOptimizer.cpp @@ -58,7 +58,7 @@ // are PHI inst. // //===----------------------------------------------------------------------===// -#include <unordered_set> + #define HEXAGON_QFP_OPTIMIZER "QFP optimizer pass" #include "Hexagon.h" @@ -86,6 +86,9 @@ using namespace llvm; cl::opt<bool> DisableQFOptimizer("disable-qfp-opt", cl::init(false), cl::desc("Disable optimization of Qfloat operations.")); +cl::opt<bool> DisableQFOptForMul( + "disable-qfp-opt-mul", cl::init(true), + cl::desc("Disable optimization of Qfloat operations for multiply.")); namespace { const std::map<unsigned short, unsigned short> QFPInstMap{ @@ -101,11 +104,21 @@ const std::map<unsigned short, unsigned short> QFPInstMap{ {Hexagon::V6_vmpy_qf16_mix_hf, Hexagon::V6_vmpy_qf16}, {Hexagon::V6_vmpy_qf32_hf, Hexagon::V6_vmpy_qf32_mix_hf}, {Hexagon::V6_vmpy_qf32_mix_hf, Hexagon::V6_vmpy_qf32_qf16}, - {Hexagon::V6_vmpy_qf32_sf, Hexagon::V6_vmpy_qf32}}; + {Hexagon::V6_vmpy_qf32_sf, Hexagon::V6_vmpy_qf32}, + {Hexagon::V6_vilog2_sf, Hexagon::V6_vilog2_qf32}, + {Hexagon::V6_vilog2_hf, Hexagon::V6_vilog2_qf16}, + {Hexagon::V6_vabs_qf32_sf, Hexagon::V6_vabs_qf32_qf32}, + {Hexagon::V6_vabs_qf16_hf, Hexagon::V6_vabs_qf16_qf16}, + {Hexagon::V6_vneg_qf32_sf, Hexagon::V6_vneg_qf32_qf32}, + {Hexagon::V6_vneg_qf16_hf, Hexagon::V6_vneg_qf16_qf16}}; } // namespace -namespace { +namespace llvm { +FunctionPass *createHexagonQFPOptimizer(); +void initializeHexagonQFPOptimizerPass(PassRegistry &); +} // namespace llvm +namespace { struct HexagonQFPOptimizer : public MachineFunctionPass { public: static char ID; @@ -116,6 +129,10 @@ public: bool optimizeQfp(MachineInstr *MI, MachineBasicBlock *MBB); + bool optimizeQfpTwoOp(MachineInstr *MI, MachineBasicBlock *MBB); + + bool optimizeQfpOneOp(MachineInstr *MI, MachineBasicBlock *MBB); + StringRef getPassName() const override { return HEXAGON_QFP_OPTIMIZER; } void getAnalysisUsage(AnalysisUsage &AU) const override { @@ -142,19 +159,69 @@ FunctionPass *llvm::createHexagonQFPOptimizer() { bool HexagonQFPOptimizer::optimizeQfp(MachineInstr *MI, MachineBasicBlock *MBB) { - // Early exit: - // - if instruction is invalid or has too few operands (QFP ops need 2 sources - // + 1 dest), - // - or does not have a transformation mapping. - if (MI->getNumOperands() < 3) + if (MI->getNumOperands() == 2) + return optimizeQfpOneOp(MI, MBB); + else if (MI->getNumOperands() == 3) + return optimizeQfpTwoOp(MI, MBB); + else return false; +} + +bool HexagonQFPOptimizer::optimizeQfpOneOp(MachineInstr *MI, + MachineBasicBlock *MBB) { + + unsigned Op0F = 0; auto It = QFPInstMap.find(MI->getOpcode()); if (It == QFPInstMap.end()) return false; + unsigned short InstTy = It->second; + // Get the reachind defs of MI + MachineInstr *DefMI = MRI->getVRegDef(MI->getOperand(1).getReg()); + MachineOperand &Res = MI->getOperand(0); + if (!Res.isReg()) + return false; + + LLVM_DEBUG(dbgs() << "\n[Reaching Defs of operands]: "; DefMI->dump()); + MachineInstr *ReachDefDef = nullptr; + + // Get the reaching def of the reaching def to check for W reg def + if (DefMI->getNumOperands() > 1 && DefMI->getOperand(1).isReg() && + DefMI->getOperand(1).getReg().isVirtual()) + ReachDefDef = MRI->getVRegDef(DefMI->getOperand(1).getReg()); + unsigned ReachDefOp = DefMI->getOpcode(); + MachineInstrBuilder MIB; + + // Check if the reaching def is a conversion + if (ReachDefOp == Hexagon::V6_vconv_sf_qf32 || + ReachDefOp == Hexagon::V6_vconv_hf_qf16) { + + // Return if the reaching def of reaching def is W type + if (ReachDefDef && MRI->getRegClass(ReachDefDef->getOperand(0).getReg()) == + &Hexagon::HvxWRRegClass) + return false; + + // Analyze the use operands of the conversion to get their KILL status + MachineOperand &SrcOp = DefMI->getOperand(1); + Op0F = getKillRegState(SrcOp.isKill()); + SrcOp.setIsKill(false); + MIB = BuildMI(*MBB, MI, MI->getDebugLoc(), HII->get(InstTy), Res.getReg()) + .addReg(SrcOp.getReg(), Op0F, SrcOp.getSubReg()); + LLVM_DEBUG(dbgs() << "\n[Inserting]: "; MIB.getInstr()->dump()); + return true; + } + return false; +} + +bool HexagonQFPOptimizer::optimizeQfpTwoOp(MachineInstr *MI, + MachineBasicBlock *MBB) { unsigned Op0F = 0; unsigned Op1F = 0; + auto It = QFPInstMap.find(MI->getOpcode()); + if (It == QFPInstMap.end()) + return false; + unsigned short InstTy = It->second; // Get the reaching defs of MI, DefMI1 and DefMI2 MachineInstr *DefMI1 = nullptr; MachineInstr *DefMI2 = nullptr; @@ -167,6 +234,9 @@ bool HexagonQFPOptimizer::optimizeQfp(MachineInstr *MI, return false; MachineOperand &Res = MI->getOperand(0); + if (!Res.isReg()) + return false; + MachineInstr *Inst1 = nullptr; MachineInstr *Inst2 = nullptr; LLVM_DEBUG(dbgs() << "\n[Reaching Defs of operands]: "; DefMI1->dump(); @@ -185,7 +255,8 @@ bool HexagonQFPOptimizer::optimizeQfp(MachineInstr *MI, unsigned Def2OP = DefMI2->getOpcode(); MachineInstrBuilder MIB; - // Case 1: Both reaching defs of MI are qf to sf/hf conversions + + // Check if the both the reaching defs of MI are qf to sf/hf conversions if ((Def1OP == Hexagon::V6_vconv_sf_qf32 && Def2OP == Hexagon::V6_vconv_sf_qf32) || (Def1OP == Hexagon::V6_vconv_hf_qf16 && @@ -226,7 +297,7 @@ bool HexagonQFPOptimizer::optimizeQfp(MachineInstr *MI, LLVM_DEBUG(dbgs() << "\n[Inserting]: "; MIB.getInstr()->dump()); return true; - // Case 2: Left operand is conversion to sf/hf + // Check if left operand's reaching def is a conversion to sf/hf } else if (((Def1OP == Hexagon::V6_vconv_sf_qf32 && Def2OP != Hexagon::V6_vconv_sf_qf32) || (Def1OP == Hexagon::V6_vconv_hf_qf16 && @@ -250,7 +321,7 @@ bool HexagonQFPOptimizer::optimizeQfp(MachineInstr *MI, LLVM_DEBUG(dbgs() << "\n[Inserting]: "; MIB.getInstr()->dump()); return true; - // Case 2: Left operand is conversion to sf/hf + // Check if right operand's reaching def is a conversion to sf/hf } else if (((Def1OP != Hexagon::V6_vconv_sf_qf32 && Def2OP == Hexagon::V6_vconv_sf_qf32) || (Def1OP != Hexagon::V6_vconv_hf_qf16 && @@ -258,13 +329,6 @@ bool HexagonQFPOptimizer::optimizeQfp(MachineInstr *MI, !DefMI1->isPHI() && (MI->getOpcode() != Hexagon::V6_vmpy_qf32_sf)) { // The second operand of original instruction is converted. - // In "mix" instructions, "qf" operand is always the first operand. - - // Caveat: vsub is not commutative w.r.t operands. - if (InstTy == Hexagon::V6_vsub_qf16_mix || - InstTy == Hexagon::V6_vsub_qf32_mix) - return false; - if (Inst2 && MRI->getRegClass(Inst2->getOperand(0).getReg()) == &Hexagon::HvxWRRegClass) return false; @@ -275,10 +339,26 @@ bool HexagonQFPOptimizer::optimizeQfp(MachineInstr *MI, Op1F = getKillRegState(Src2.isKill()); Src2.setIsKill(false); Op0F = getKillRegState(Src1.isKill()); - MIB = BuildMI(*MBB, MI, MI->getDebugLoc(), HII->get(InstTy), Res.getReg()) - .addReg(Src2.getReg(), Op1F, - Src2.getSubReg()) // Notice the operands are flipped. - .addReg(Src1.getReg(), Op0F, Src1.getSubReg()); + if (InstTy == Hexagon::V6_vsub_qf16_mix || + InstTy == Hexagon::V6_vsub_qf32_mix) { + if (!HST->useHVXV81Ops()) + // vsub_(hf|sf)_mix insts are only avlbl on hvx81+ + return false; + // vsub is not commutative w.r.t. operands -> treat it as a special case + // to choose the correct mix instruction. + if (Def2OP == Hexagon::V6_vconv_sf_qf32) + InstTy = Hexagon::V6_vsub_sf_mix; + else if (Def2OP == Hexagon::V6_vconv_hf_qf16) + InstTy = Hexagon::V6_vsub_hf_mix; + MIB = BuildMI(*MBB, MI, MI->getDebugLoc(), HII->get(InstTy), Res.getReg()) + .addReg(Src1.getReg(), Op0F, Src1.getSubReg()) + .addReg(Src2.getReg(), Op1F, Src2.getSubReg()); + } else { + MIB = BuildMI(*MBB, MI, MI->getDebugLoc(), HII->get(InstTy), Res.getReg()) + .addReg(Src2.getReg(), Op1F, + Src2.getSubReg()) // Notice the operands are flipped. + .addReg(Src1.getReg(), Op0F, Src1.getSubReg()); + } LLVM_DEBUG(dbgs() << "\n[Inserting]: "; MIB.getInstr()->dump()); return true; } @@ -309,15 +389,18 @@ bool HexagonQFPOptimizer::runOnMachineFunction(MachineFunction &MF) { while (MII != MBBI->instr_end()) { MachineInstr *MI = &*MII; ++MII; // As MI might be removed. - - if (QFPInstMap.count(MI->getOpcode()) && - MI->getOpcode() != Hexagon::V6_vconv_sf_qf32 && - MI->getOpcode() != Hexagon::V6_vconv_hf_qf16) { - LLVM_DEBUG(dbgs() << "\n###Analyzing for removal: "; MI->dump()); - if (optimizeQfp(MI, MBB)) { - MI->eraseFromParent(); - LLVM_DEBUG(dbgs() << "\t....Removing...."); - Changed = true; + if (QFPInstMap.count(MI->getOpcode())) { + auto OpC = MI->getOpcode(); + if (DisableQFOptForMul && HII->isQFPMul(MI)) + continue; + if (OpC != Hexagon::V6_vconv_sf_qf32 && + OpC != Hexagon::V6_vconv_hf_qf16) { + LLVM_DEBUG(dbgs() << "\n###Analyzing for removal: "; MI->dump()); + if (optimizeQfp(MI, MBB)) { + MI->eraseFromParent(); + LLVM_DEBUG(dbgs() << "\t....Removing...."); + Changed = true; + } } } } diff --git a/llvm/lib/Target/NVPTX/NVPTXISelLowering.cpp b/llvm/lib/Target/NVPTX/NVPTXISelLowering.cpp index 2f1a7ad..a3deb36 100644 --- a/llvm/lib/Target/NVPTX/NVPTXISelLowering.cpp +++ b/llvm/lib/Target/NVPTX/NVPTXISelLowering.cpp @@ -305,7 +305,8 @@ static void ComputePTXValueVTs(const TargetLowering &TLI, const DataLayout &DL, uint64_t StartingOffset = 0) { SmallVector<EVT, 16> TempVTs; SmallVector<uint64_t, 16> TempOffsets; - ComputeValueVTs(TLI, DL, Ty, TempVTs, &TempOffsets, StartingOffset); + ComputeValueVTs(TLI, DL, Ty, TempVTs, /*MemVTs=*/nullptr, &TempOffsets, + StartingOffset); for (const auto [VT, Off] : zip(TempVTs, TempOffsets)) { MVT RegisterVT = TLI.getRegisterTypeForCallingConv(Ctx, CallConv, VT); diff --git a/llvm/lib/Target/PowerPC/PPCAsmPrinter.cpp b/llvm/lib/Target/PowerPC/PPCAsmPrinter.cpp index 780e124..122738c 100644 --- a/llvm/lib/Target/PowerPC/PPCAsmPrinter.cpp +++ b/llvm/lib/Target/PowerPC/PPCAsmPrinter.cpp @@ -2750,6 +2750,10 @@ void PPCAIXAsmPrinter::emitGlobalVariable(const GlobalVariable *GV) { if (isSpecialLLVMGlobalArrayToSkip(GV) || isSpecialLLVMGlobalArrayForStaticInit(GV)) return; + // Ignore non-emitted data. + if (GV->getSection() == "llvm.metadata") + return; + // If the Global Variable has the toc-data attribute, it needs to be emitted // when we emit the .toc section. if (GV->hasAttribute("toc-data")) { diff --git a/llvm/lib/Target/RISCV/RISCVFrameLowering.cpp b/llvm/lib/Target/RISCV/RISCVFrameLowering.cpp index b37b740..f881c4c 100644 --- a/llvm/lib/Target/RISCV/RISCVFrameLowering.cpp +++ b/llvm/lib/Target/RISCV/RISCVFrameLowering.cpp @@ -789,6 +789,8 @@ void RISCVFrameLowering::allocateStack(MachineBasicBlock &MBB, // Unroll the probe loop depending on the number of iterations. if (Offset < ProbeSize * 5) { + uint64_t CFAAdjust = RealStackSize - Offset; + uint64_t CurrentOffset = 0; while (CurrentOffset + ProbeSize <= Offset) { RI->adjustReg(MBB, MBBI, DL, SPReg, SPReg, @@ -802,7 +804,7 @@ void RISCVFrameLowering::allocateStack(MachineBasicBlock &MBB, CurrentOffset += ProbeSize; if (EmitCFI) - CFIBuilder.buildDefCFAOffset(CurrentOffset); + CFIBuilder.buildDefCFAOffset(CurrentOffset + CFAAdjust); } uint64_t Residual = Offset - CurrentOffset; @@ -810,7 +812,7 @@ void RISCVFrameLowering::allocateStack(MachineBasicBlock &MBB, RI->adjustReg(MBB, MBBI, DL, SPReg, SPReg, StackOffset::getFixed(-Residual), Flag, getStackAlign()); if (EmitCFI) - CFIBuilder.buildDefCFAOffset(Offset); + CFIBuilder.buildDefCFAOffset(RealStackSize); if (DynAllocation) { // s[d|w] zero, 0(sp) diff --git a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp index 995ae75..3b69eda 100644 --- a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp +++ b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp @@ -17867,6 +17867,7 @@ static SDValue combineOp_VLToVWOp_VL(SDNode *N, SmallVector<SDNode *> Worklist; SmallPtrSet<SDNode *, 8> Inserted; + SmallPtrSet<SDNode *, 8> ExtensionsToRemove; Worklist.push_back(N); Inserted.insert(N); SmallVector<CombineResult> CombinesToApply; @@ -17876,22 +17877,25 @@ static SDValue combineOp_VLToVWOp_VL(SDNode *N, NodeExtensionHelper LHS(Root, 0, DAG, Subtarget); NodeExtensionHelper RHS(Root, 1, DAG, Subtarget); - auto AppendUsersIfNeeded = [&Worklist, &Subtarget, - &Inserted](const NodeExtensionHelper &Op) { - if (Op.needToPromoteOtherUsers()) { - for (SDUse &Use : Op.OrigOperand->uses()) { - SDNode *TheUser = Use.getUser(); - if (!NodeExtensionHelper::isSupportedRoot(TheUser, Subtarget)) - return false; - // We only support the first 2 operands of FMA. - if (Use.getOperandNo() >= 2) - return false; - if (Inserted.insert(TheUser).second) - Worklist.push_back(TheUser); - } - } - return true; - }; + auto AppendUsersIfNeeded = + [&Worklist, &Subtarget, &Inserted, + &ExtensionsToRemove](const NodeExtensionHelper &Op) { + if (Op.needToPromoteOtherUsers()) { + // Remember that we're supposed to remove this extension. + ExtensionsToRemove.insert(Op.OrigOperand.getNode()); + for (SDUse &Use : Op.OrigOperand->uses()) { + SDNode *TheUser = Use.getUser(); + if (!NodeExtensionHelper::isSupportedRoot(TheUser, Subtarget)) + return false; + // We only support the first 2 operands of FMA. + if (Use.getOperandNo() >= 2) + return false; + if (Inserted.insert(TheUser).second) + Worklist.push_back(TheUser); + } + } + return true; + }; // Control the compile time by limiting the number of node we look at in // total. @@ -17912,6 +17916,15 @@ static SDValue combineOp_VLToVWOp_VL(SDNode *N, std::optional<CombineResult> Res = FoldingStrategy(Root, LHS, RHS, DAG, Subtarget); if (Res) { + // If this strategy wouldn't remove an extension we're supposed to + // remove, reject it. + if (!Res->LHSExt.has_value() && + ExtensionsToRemove.contains(LHS.OrigOperand.getNode())) + continue; + if (!Res->RHSExt.has_value() && + ExtensionsToRemove.contains(RHS.OrigOperand.getNode())) + continue; + Matched = true; CombinesToApply.push_back(*Res); // All the inputs that are extended need to be folded, otherwise diff --git a/llvm/lib/Target/RISCV/RISCVInsertVSETVLI.cpp b/llvm/lib/Target/RISCV/RISCVInsertVSETVLI.cpp index 636e31c..bf9de0a 100644 --- a/llvm/lib/Target/RISCV/RISCVInsertVSETVLI.cpp +++ b/llvm/lib/Target/RISCV/RISCVInsertVSETVLI.cpp @@ -1583,7 +1583,10 @@ void RISCVInsertVSETVLI::emitVSETVLIs(MachineBasicBlock &MBB) { if (!TII->isAddImmediate(*DeadMI, Reg)) continue; LIS->RemoveMachineInstrFromMaps(*DeadMI); + Register AddReg = DeadMI->getOperand(1).getReg(); DeadMI->eraseFromParent(); + if (AddReg.isVirtual()) + LIS->shrinkToUses(&LIS->getInterval(AddReg)); } } } @@ -1869,11 +1872,15 @@ void RISCVInsertVSETVLI::coalesceVSETVLIs(MachineBasicBlock &MBB) const { // Loop over the dead AVL values, and delete them now. This has // to be outside the above loop to avoid invalidating iterators. for (auto *MI : ToDelete) { + assert(MI->getOpcode() == RISCV::ADDI); + Register AddReg = MI->getOperand(1).getReg(); if (LIS) { LIS->removeInterval(MI->getOperand(0).getReg()); LIS->RemoveMachineInstrFromMaps(*MI); } MI->eraseFromParent(); + if (LIS && AddReg.isVirtual()) + LIS->shrinkToUses(&LIS->getInterval(AddReg)); } } diff --git a/llvm/lib/Target/RISCV/RISCVSchedSpacemitX60.td b/llvm/lib/Target/RISCV/RISCVSchedSpacemitX60.td index 24ebbc3..41071b2 100644 --- a/llvm/lib/Target/RISCV/RISCVSchedSpacemitX60.td +++ b/llvm/lib/Target/RISCV/RISCVSchedSpacemitX60.td @@ -654,8 +654,17 @@ foreach mx = SchedMxList in { foreach sew = SchedSEWSet<mx>.val in { defvar IsWorstCase = SMX60IsWorstCaseMXSEW<mx, sew, SchedMxList>.c; - defm "" : LMULSEWWriteResMXSEW<"WriteVIRedV_From", [SMX60_VIEU], mx, sew, IsWorstCase>; - defm "" : LMULSEWWriteResMXSEW<"WriteVIRedMinMaxV_From", [SMX60_VIEU], mx, sew, IsWorstCase>; + defvar VIRedLat = GetLMULValue<[5, 5, 5, 7, 11, 19, 35], mx>.c; + defvar VIRedOcc = GetLMULValue<[1, 1, 2, 2, 4, 10, 35], mx>.c; + let Latency = VIRedLat, ReleaseAtCycles = [VIRedOcc] in { + defm "" : LMULSEWWriteResMXSEW<"WriteVIRedMinMaxV_From", [SMX60_VIEU], mx, sew, IsWorstCase>; + + // Pattern for vredsum: 5/5/5/7/11/19/35 + // Pattern for vredand, vredor, vredxor: 4/4/4/6/10/18/34 + // They are grouped together, so we use the worst-case vredsum latency. + // TODO: split vredand, vredor, vredxor into separate scheduling classe. + defm "" : LMULSEWWriteResMXSEW<"WriteVIRedV_From", [SMX60_VIEU], mx, sew, IsWorstCase>; + } } } @@ -663,7 +672,27 @@ foreach mx = SchedMxListWRed in { foreach sew = SchedSEWSet<mx, 0, 1>.val in { defvar IsWorstCase = SMX60IsWorstCaseMXSEW<mx, sew, SchedMxListWRed>.c; - defm "" : LMULSEWWriteResMXSEW<"WriteVIWRedV_From", [SMX60_VIEU], mx, sew, IsWorstCase>; + defvar VIRedLat = GetLMULValue<[5, 5, 5, 7, 11, 19, 35], mx>.c; + defvar VIRedOcc = GetLMULValue<[1, 1, 2, 2, 4, 10, 35], mx>.c; + let Latency = VIRedLat, ReleaseAtCycles = [VIRedOcc] in { + defm "" : LMULSEWWriteResMXSEW<"WriteVIWRedV_From", [SMX60_VIEU], mx, sew, IsWorstCase>; + } + } +} + +foreach mx = SchedMxListF in { + foreach sew = SchedSEWSet<mx, 1>.val in { + defvar IsWorstCase = SMX60IsWorstCaseMXSEW<mx, sew, SchedMxListF, 1>.c; + + // Latency for vfredmax.vs, vfredmin.vs: 12/12/15/21/33/57 + // Latency for vfredusum.vs is slightly lower for e16/e32 + // We use the worst-case + defvar VFRedLat = GetLMULValue<[12, 12, 12, 15, 21, 33, 57], mx>.c; + defvar VFRedOcc = GetLMULValue<[8, 8, 8, 8, 14, 20, 57], mx>.c; + let Latency = VFRedLat, ReleaseAtCycles = [VFRedOcc] in { + defm "" : LMULSEWWriteResMXSEW<"WriteVFRedV_From", [SMX60_VFP], mx, sew, IsWorstCase>; + defm "" : LMULSEWWriteResMXSEW<"WriteVFRedMinMaxV_From", [SMX60_VFP], mx, sew, IsWorstCase>; + } } } @@ -671,9 +700,20 @@ foreach mx = SchedMxListF in { foreach sew = SchedSEWSet<mx, 1>.val in { defvar IsWorstCase = SMX60IsWorstCaseMXSEW<mx, sew, SchedMxListF, 1>.c; - defm "" : LMULSEWWriteResMXSEW<"WriteVFRedV_From", [SMX60_VFP], mx, sew, IsWorstCase>; - defm "" : LMULSEWWriteResMXSEW<"WriteVFRedOV_From", [SMX60_VFP], mx, sew, IsWorstCase>; - defm "" : LMULSEWWriteResMXSEW<"WriteVFRedMinMaxV_From", [SMX60_VFP], mx, sew, IsWorstCase>; + // Compute latency based on SEW + defvar VFRedOV_FromLat = !cond( + !eq(sew, 16) : ConstValueUntilLMULThenDouble<"MF4", 12, mx>.c, + !eq(sew, 32) : ConstValueUntilLMULThenDouble<"MF2", 12, mx>.c, + !eq(sew, 64) : ConstValueUntilLMULThenDouble<"M1", 12, mx>.c + ); + defvar VFRedOV_FromOcc = !cond( + !eq(sew, 16) : GetLMULValue<[8, 8, 20, 24, 48, 96, 384], mx>.c, + !eq(sew, 32) : GetLMULValue<[8, 8, 8, 12, 24, 48, 192], mx>.c, + !eq(sew, 64) : GetLMULValue<[6, 6, 6, 6, 12, 24, 96], mx>.c + ); + let Latency = VFRedOV_FromLat, ReleaseAtCycles = [VFRedOV_FromOcc] in { + defm "" : LMULSEWWriteResMXSEW<"WriteVFRedOV_From", [SMX60_VFP], mx, sew, IsWorstCase>; + } } } @@ -681,8 +721,18 @@ foreach mx = SchedMxListFWRed in { foreach sew = SchedSEWSet<mx, 1, 1>.val in { defvar IsWorstCase = SMX60IsWorstCaseMXSEW<mx, sew, SchedMxListFWRed, 1>.c; - defm "" : LMULSEWWriteResMXSEW<"WriteVFWRedV_From", [SMX60_VFP], mx, sew, IsWorstCase>; - defm "" : LMULSEWWriteResMXSEW<"WriteVFWRedOV_From", [SMX60_VFP], mx, sew, IsWorstCase>; + defvar VFRedOVLat = !cond( + !eq(sew, 16) : ConstValueUntilLMULThenDouble<"MF4", 16, mx>.c, + !eq(sew, 32) : ConstValueUntilLMULThenDouble<"MF2", 16, mx>.c, + ); + defvar VFRedOVOcc = !cond( + !eq(sew, 16) : GetLMULValue<[11, 11, 27, 32, 64, 128, 512], mx>.c, + !eq(sew, 32) : GetLMULValue<[11, 11, 11, 16, 32, 64, 256], mx>.c, + ); + let Latency = VFRedOVLat, ReleaseAtCycles = [VFRedOVOcc] in { + defm "" : LMULSEWWriteResMXSEW<"WriteVFWRedV_From", [SMX60_VFP], mx, sew, IsWorstCase>; + defm "" : LMULSEWWriteResMXSEW<"WriteVFWRedOV_From", [SMX60_VFP], mx, sew, IsWorstCase>; + } } } diff --git a/llvm/lib/Target/SPIRV/SPIRVBuiltins.cpp b/llvm/lib/Target/SPIRV/SPIRVBuiltins.cpp index 56a38bb..b2cbdb2 100644 --- a/llvm/lib/Target/SPIRV/SPIRVBuiltins.cpp +++ b/llvm/lib/Target/SPIRV/SPIRVBuiltins.cpp @@ -2390,6 +2390,15 @@ static bool generateBindlessImageINTELInst(const SPIRV::IncomingCall *Call, return buildBindlessImageINTELInst(Call, Opcode, MIRBuilder, GR); } +static bool generateBlockingPipesInst(const SPIRV::IncomingCall *Call, + MachineIRBuilder &MIRBuilder, + SPIRVGlobalRegistry *GR) { + const SPIRV::DemangledBuiltin *Builtin = Call->Builtin; + unsigned Opcode = + SPIRV::lookupNativeBuiltin(Builtin->Name, Builtin->Set)->Opcode; + return buildOpFromWrapper(MIRBuilder, Opcode, Call, Register(0)); +} + static bool generateTernaryBitwiseFunctionINTELInst(const SPIRV::IncomingCall *Call, MachineIRBuilder &MIRBuilder, @@ -3050,6 +3059,8 @@ std::optional<bool> lowerBuiltin(const StringRef DemangledCall, return generatePipeInst(Call.get(), MIRBuilder, GR); case SPIRV::PredicatedLoadStore: return generatePredicatedLoadStoreInst(Call.get(), MIRBuilder, GR); + case SPIRV::BlockingPipes: + return generateBlockingPipesInst(Call.get(), MIRBuilder, GR); } return false; } diff --git a/llvm/lib/Target/SPIRV/SPIRVBuiltins.td b/llvm/lib/Target/SPIRV/SPIRVBuiltins.td index c259cce..492a98e 100644 --- a/llvm/lib/Target/SPIRV/SPIRVBuiltins.td +++ b/llvm/lib/Target/SPIRV/SPIRVBuiltins.td @@ -71,6 +71,7 @@ def TernaryBitwiseINTEL : BuiltinGroup; def Block2DLoadStore : BuiltinGroup; def Pipe : BuiltinGroup; def PredicatedLoadStore : BuiltinGroup; +def BlockingPipes : BuiltinGroup; //===----------------------------------------------------------------------===// // Class defining a demangled builtin record. The information in the record @@ -1174,6 +1175,10 @@ defm : DemangledNativeBuiltin<"clock_read_sub_group", OpenCL_std, KernelClock, 0 defm : DemangledNativeBuiltin<"clock_read_hilo_device", OpenCL_std, KernelClock, 0, 0, OpReadClockKHR>; defm : DemangledNativeBuiltin<"clock_read_hilo_work_group", OpenCL_std, KernelClock, 0, 0, OpReadClockKHR>; defm : DemangledNativeBuiltin<"clock_read_hilo_sub_group", OpenCL_std, KernelClock, 0, 0, OpReadClockKHR>; + +//SPV_ALTERA_blocking_pipes +defm : DemangledNativeBuiltin<"__spirv_WritePipeBlockingINTEL", OpenCL_std, BlockingPipes, 0, 0, OpWritePipeBlockingALTERA>; +defm : DemangledNativeBuiltin<"__spirv_ReadPipeBlockingINTEL", OpenCL_std, BlockingPipes, 0, 0, OpReadPipeBlockingALTERA>; defm : DemangledNativeBuiltin<"__spirv_ReadClockKHR", OpenCL_std, KernelClock, 1, 1, OpReadClockKHR>; //===----------------------------------------------------------------------===// diff --git a/llvm/lib/Target/SPIRV/SPIRVCommandLine.cpp b/llvm/lib/Target/SPIRV/SPIRVCommandLine.cpp index 43b2869..f681b0d 100644 --- a/llvm/lib/Target/SPIRV/SPIRVCommandLine.cpp +++ b/llvm/lib/Target/SPIRV/SPIRVCommandLine.cpp @@ -159,7 +159,9 @@ static const std::map<std::string, SPIRV::Extension::Extension, std::less<>> {"SPV_KHR_maximal_reconvergence", SPIRV::Extension::Extension::SPV_KHR_maximal_reconvergence}, {"SPV_INTEL_kernel_attributes", - SPIRV::Extension::Extension::SPV_INTEL_kernel_attributes}}; + SPIRV::Extension::Extension::SPV_INTEL_kernel_attributes}, + {"SPV_ALTERA_blocking_pipes", + SPIRV::Extension::Extension::SPV_ALTERA_blocking_pipes}}; bool SPIRVExtensionsParser::parse(cl::Option &O, StringRef ArgName, StringRef ArgValue, diff --git a/llvm/lib/Target/SPIRV/SPIRVInstrInfo.td b/llvm/lib/Target/SPIRV/SPIRVInstrInfo.td index a61351e..03bd61b 100644 --- a/llvm/lib/Target/SPIRV/SPIRVInstrInfo.td +++ b/llvm/lib/Target/SPIRV/SPIRVInstrInfo.td @@ -993,3 +993,9 @@ def OpPredicatedLoadINTEL: Op<6528, (outs ID:$res), (ins TYPE:$resType, ID:$ptr, "$res = OpPredicatedLoadINTEL $resType $ptr $predicate $default_value">; def OpPredicatedStoreINTEL: Op<6529, (outs), (ins ID:$ptr, ID:$object, ID:$predicate, variable_ops), "OpPredicatedStoreINTEL $ptr $object $predicate">; + +//SPV_ALTERA_blocking_pipes +def OpReadPipeBlockingALTERA :Op<5946, (outs), (ins ID:$pipe, ID:$pointer, ID:$packetSize, ID:$packetAlignment), + "OpReadPipeBlockingALTERA $pipe $pointer $packetSize $packetAlignment">; +def OpWritePipeBlockingALTERA :Op<5946, (outs), (ins ID:$pipe, ID:$pointer, ID:$packetSize, ID:$packetAlignment), + "OpWritePipeBlockingALTERA $pipe $pointer $packetSize $packetAlignment">; diff --git a/llvm/lib/Target/SPIRV/SPIRVModuleAnalysis.cpp b/llvm/lib/Target/SPIRV/SPIRVModuleAnalysis.cpp index e5ac76c4..af76016 100644 --- a/llvm/lib/Target/SPIRV/SPIRVModuleAnalysis.cpp +++ b/llvm/lib/Target/SPIRV/SPIRVModuleAnalysis.cpp @@ -1885,6 +1885,13 @@ void addInstrRequirements(const MachineInstr &MI, Reqs.addCapability( SPIRV::Capability::CooperativeMatrixCheckedInstructionsINTEL); break; + case SPIRV::OpReadPipeBlockingALTERA: + case SPIRV::OpWritePipeBlockingALTERA: + if (ST.canUseExtension(SPIRV::Extension::SPV_ALTERA_blocking_pipes)) { + Reqs.addExtension(SPIRV::Extension::SPV_ALTERA_blocking_pipes); + Reqs.addCapability(SPIRV::Capability::BlockingPipesALTERA); + } + break; case SPIRV::OpCooperativeMatrixGetElementCoordINTEL: if (!ST.canUseExtension(SPIRV::Extension::SPV_INTEL_joint_matrix)) report_fatal_error("OpCooperativeMatrixGetElementCoordINTEL requires the " diff --git a/llvm/lib/Target/SPIRV/SPIRVPrepareFunctions.cpp b/llvm/lib/Target/SPIRV/SPIRVPrepareFunctions.cpp index 4e4e6fb..be88f33 100644 --- a/llvm/lib/Target/SPIRV/SPIRVPrepareFunctions.cpp +++ b/llvm/lib/Target/SPIRV/SPIRVPrepareFunctions.cpp @@ -56,6 +56,13 @@ public: } }; +static cl::list<std::string> SPVAllowUnknownIntrinsics( + "spv-allow-unknown-intrinsics", cl::CommaSeparated, + cl::desc("Emit unknown intrinsics as calls to external functions. A " + "comma-separated input list of intrinsic prefixes must be " + "provided, and only intrinsics carrying a listed prefix get " + "emitted as described."), + cl::value_desc("intrinsic_prefix_0,intrinsic_prefix_1"), cl::ValueOptional); } // namespace char SPIRVPrepareFunctions::ID = 0; @@ -445,6 +452,15 @@ bool SPIRVPrepareFunctions::substituteIntrinsicCalls(Function *F) { EraseFromParent); Changed = true; break; + default: + if (TM.getTargetTriple().getVendor() == Triple::AMD || + any_of(SPVAllowUnknownIntrinsics, [II](auto &&Prefix) { + if (Prefix.empty()) + return false; + return II->getCalledFunction()->getName().starts_with(Prefix); + })) + Changed |= lowerIntrinsicToFunction(II); + break; } } } diff --git a/llvm/lib/Target/SPIRV/SPIRVSymbolicOperands.td b/llvm/lib/Target/SPIRV/SPIRVSymbolicOperands.td index 1b4b29b..65a8885 100644 --- a/llvm/lib/Target/SPIRV/SPIRVSymbolicOperands.td +++ b/llvm/lib/Target/SPIRV/SPIRVSymbolicOperands.td @@ -309,7 +309,7 @@ defm SPV_KHR_shader_clock : ExtensionOperand<54, [EnvVulkan, EnvOpenCL]>; defm SPV_INTEL_unstructured_loop_controls : ExtensionOperand<55, [EnvOpenCL]>; defm SPV_EXT_demote_to_helper_invocation : ExtensionOperand<56, [EnvVulkan]>; defm SPV_INTEL_fpga_reg : ExtensionOperand<57, [EnvOpenCL]>; -defm SPV_INTEL_blocking_pipes : ExtensionOperand<58, [EnvOpenCL]>; +defm SPV_ALTERA_blocking_pipes : ExtensionOperand<58, [EnvOpenCL]>; defm SPV_GOOGLE_user_type : ExtensionOperand<59, [EnvVulkan]>; defm SPV_KHR_physical_storage_buffer : ExtensionOperand<60, [EnvVulkan]>; defm SPV_INTEL_kernel_attributes : ExtensionOperand<61, [EnvOpenCL]>; @@ -611,6 +611,7 @@ defm TensorFloat32RoundingINTEL : CapabilityOperand<6425, 0, 0, [SPV_INTEL_tenso defm BFloat16TypeKHR : CapabilityOperand<5116, 0, 0, [SPV_KHR_bfloat16], []>; defm BFloat16DotProductKHR : CapabilityOperand<5117, 0, 0, [SPV_KHR_bfloat16], [BFloat16TypeKHR]>; defm BFloat16CooperativeMatrixKHR : CapabilityOperand<5118, 0, 0, [SPV_KHR_bfloat16], [BFloat16TypeKHR, CooperativeMatrixKHR]>; +defm BlockingPipesALTERA : CapabilityOperand<5945, 0, 0, [SPV_ALTERA_blocking_pipes], []>; //===----------------------------------------------------------------------===// // Multiclass used to define SourceLanguage enum values and at the same time diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp index 4d44227b3..d103953 100644 --- a/llvm/lib/Target/X86/X86ISelLowering.cpp +++ b/llvm/lib/Target/X86/X86ISelLowering.cpp @@ -53354,6 +53354,7 @@ static SDValue combineMaskedStore(SDNode *N, SelectionDAG &DAG, // i32 sub value. static SDValue narrowBitOpRMW(StoreSDNode *St, const SDLoc &DL, SelectionDAG &DAG, + TargetLowering::DAGCombinerInfo &DCI, const X86Subtarget &Subtarget) { using namespace SDPatternMatch; SDValue StoredVal = St->getValue(); @@ -53442,7 +53443,8 @@ static SDValue narrowBitOpRMW(StoreSDNode *St, const SDLoc &DL, } SDValue NewStore = - DAG.getStore(St->getChain(), DL, Res, NewPtr, St->getPointerInfo(), + DAG.getStore(St->getChain(), DL, Res, NewPtr, + MachinePointerInfo(St->getPointerInfo().getAddrSpace()), Align(), St->getMemOperand()->getFlags()); // If there are other uses of StoredVal, replace with a new load of the @@ -53450,6 +53452,8 @@ static SDValue narrowBitOpRMW(StoreSDNode *St, const SDLoc &DL, if (!StoredVal.hasOneUse()) { SDValue NewLoad = DAG.getLoad(VT, DL, NewStore, Ld->getBasePtr(), Ld->getMemOperand()); + for (SDNode *User : StoredVal->users()) + DCI.AddToWorklist(User); DAG.ReplaceAllUsesWith(StoredVal, NewLoad); } return NewStore; @@ -53681,7 +53685,7 @@ static SDValue combineStore(SDNode *N, SelectionDAG &DAG, } } - if (SDValue R = narrowBitOpRMW(St, dl, DAG, Subtarget)) + if (SDValue R = narrowBitOpRMW(St, dl, DAG, DCI, Subtarget)) return R; // Convert store(cmov(load(p), x, CC), p) to cstore(x, p, CC) @@ -54639,7 +54643,8 @@ static SDValue combineTruncate(SDNode *N, SelectionDAG &DAG, SDValue NewPtr = DAG.getMemBasePlusOffset( Ld->getBasePtr(), PtrByteOfs, DL, SDNodeFlags::NoUnsignedWrap); SDValue NewLoad = - DAG.getLoad(VT, DL, Ld->getChain(), NewPtr, Ld->getPointerInfo(), + DAG.getLoad(VT, DL, Ld->getChain(), NewPtr, + MachinePointerInfo(Ld->getPointerInfo().getAddrSpace()), Align(), Ld->getMemOperand()->getFlags()); DAG.makeEquivalentMemoryOrdering(Ld, NewLoad); return NewLoad; diff --git a/llvm/lib/TargetParser/TargetParser.cpp b/llvm/lib/TargetParser/TargetParser.cpp index 975a271..96bef0e 100644 --- a/llvm/lib/TargetParser/TargetParser.cpp +++ b/llvm/lib/TargetParser/TargetParser.cpp @@ -174,8 +174,8 @@ constexpr GPUInfo AMDGCNGPUs[] = { {{"gfx1153"}, {"gfx1153"}, GK_GFX1153, FEATURE_FAST_FMA_F32|FEATURE_FAST_DENORMAL_F32|FEATURE_WAVE32|FEATURE_WGP}, {{"gfx1200"}, {"gfx1200"}, GK_GFX1200, FEATURE_FAST_FMA_F32|FEATURE_FAST_DENORMAL_F32|FEATURE_WAVE32|FEATURE_WGP}, {{"gfx1201"}, {"gfx1201"}, GK_GFX1201, FEATURE_FAST_FMA_F32|FEATURE_FAST_DENORMAL_F32|FEATURE_WAVE32|FEATURE_WGP}, - {{"gfx1250"}, {"gfx1250"}, GK_GFX1250, FEATURE_FAST_FMA_F32|FEATURE_FAST_DENORMAL_F32|FEATURE_WAVE32}, - {{"gfx1251"}, {"gfx1251"}, GK_GFX1251, FEATURE_FAST_FMA_F32|FEATURE_FAST_DENORMAL_F32|FEATURE_WAVE32}, + {{"gfx1250"}, {"gfx1250"}, GK_GFX1250, FEATURE_FAST_FMA_F32|FEATURE_FAST_DENORMAL_F32|FEATURE_WAVE32|FEATURE_XNACK_ALWAYS}, + {{"gfx1251"}, {"gfx1251"}, GK_GFX1251, FEATURE_FAST_FMA_F32|FEATURE_FAST_DENORMAL_F32|FEATURE_WAVE32|FEATURE_XNACK_ALWAYS}, {{"gfx9-generic"}, {"gfx9-generic"}, GK_GFX9_GENERIC, FEATURE_FAST_FMA_F32|FEATURE_FAST_DENORMAL_F32|FEATURE_XNACK}, {{"gfx10-1-generic"}, {"gfx10-1-generic"}, GK_GFX10_1_GENERIC, FEATURE_FAST_FMA_F32|FEATURE_FAST_DENORMAL_F32|FEATURE_WAVE32|FEATURE_XNACK|FEATURE_WGP}, diff --git a/llvm/lib/Transforms/Instrumentation/PGOInstrumentation.cpp b/llvm/lib/Transforms/Instrumentation/PGOInstrumentation.cpp index af53fa0..02f06be 100644 --- a/llvm/lib/Transforms/Instrumentation/PGOInstrumentation.cpp +++ b/llvm/lib/Transforms/Instrumentation/PGOInstrumentation.cpp @@ -734,7 +734,7 @@ void FuncPGOInstrumentation<Edge, BBInfo>::computeCFGHash() { FunctionHash = (((uint64_t)JCH.getCRC()) << 28) + JC.getCRC(); // Reserve bit 60-63 for other information purpose. - FunctionHash &= 0x0FFFFFFFFFFFFFFF; + FunctionHash &= NamedInstrProfRecord::FUNC_HASH_MASK; if (IsCS) NamedInstrProfRecord::setCSFlagInHash(FunctionHash); LLVM_DEBUG(dbgs() << "Function Hash Computation for " << F.getName() << ":\n" diff --git a/llvm/lib/Transforms/Utils/BreakCriticalEdges.cpp b/llvm/lib/Transforms/Utils/BreakCriticalEdges.cpp index 42b1fdf..8aa8aa2 100644 --- a/llvm/lib/Transforms/Utils/BreakCriticalEdges.cpp +++ b/llvm/lib/Transforms/Utils/BreakCriticalEdges.cpp @@ -39,36 +39,36 @@ using namespace llvm; STATISTIC(NumBroken, "Number of blocks inserted"); namespace { - struct BreakCriticalEdges : public FunctionPass { - static char ID; // Pass identification, replacement for typeid - BreakCriticalEdges() : FunctionPass(ID) { - initializeBreakCriticalEdgesPass(*PassRegistry::getPassRegistry()); - } +struct BreakCriticalEdges : public FunctionPass { + static char ID; // Pass identification, replacement for typeid + BreakCriticalEdges() : FunctionPass(ID) { + initializeBreakCriticalEdgesPass(*PassRegistry::getPassRegistry()); + } - bool runOnFunction(Function &F) override { - auto *DTWP = getAnalysisIfAvailable<DominatorTreeWrapperPass>(); - auto *DT = DTWP ? &DTWP->getDomTree() : nullptr; + bool runOnFunction(Function &F) override { + auto *DTWP = getAnalysisIfAvailable<DominatorTreeWrapperPass>(); + auto *DT = DTWP ? &DTWP->getDomTree() : nullptr; - auto *PDTWP = getAnalysisIfAvailable<PostDominatorTreeWrapperPass>(); - auto *PDT = PDTWP ? &PDTWP->getPostDomTree() : nullptr; + auto *PDTWP = getAnalysisIfAvailable<PostDominatorTreeWrapperPass>(); + auto *PDT = PDTWP ? &PDTWP->getPostDomTree() : nullptr; - auto *LIWP = getAnalysisIfAvailable<LoopInfoWrapperPass>(); - auto *LI = LIWP ? &LIWP->getLoopInfo() : nullptr; - unsigned N = - SplitAllCriticalEdges(F, CriticalEdgeSplittingOptions(DT, LI, nullptr, PDT)); - NumBroken += N; - return N > 0; - } + auto *LIWP = getAnalysisIfAvailable<LoopInfoWrapperPass>(); + auto *LI = LIWP ? &LIWP->getLoopInfo() : nullptr; + unsigned N = SplitAllCriticalEdges( + F, CriticalEdgeSplittingOptions(DT, LI, nullptr, PDT)); + NumBroken += N; + return N > 0; + } - void getAnalysisUsage(AnalysisUsage &AU) const override { - AU.addPreserved<DominatorTreeWrapperPass>(); - AU.addPreserved<LoopInfoWrapperPass>(); + void getAnalysisUsage(AnalysisUsage &AU) const override { + AU.addPreserved<DominatorTreeWrapperPass>(); + AU.addPreserved<LoopInfoWrapperPass>(); - // No loop canonicalization guarantees are broken by this pass. - AU.addPreservedID(LoopSimplifyID); - } - }; -} + // No loop canonicalization guarantees are broken by this pass. + AU.addPreservedID(LoopSimplifyID); + } +}; +} // namespace char BreakCriticalEdges::ID = 0; INITIALIZE_PASS(BreakCriticalEdges, "break-crit-edges", @@ -76,6 +76,7 @@ INITIALIZE_PASS(BreakCriticalEdges, "break-crit-edges", // Publicly exposed interface to pass... char &llvm::BreakCriticalEdgesID = BreakCriticalEdges::ID; + FunctionPass *llvm::createBreakCriticalEdgesPass() { return new BreakCriticalEdges(); } diff --git a/llvm/lib/Transforms/Utils/BypassSlowDivision.cpp b/llvm/lib/Transforms/Utils/BypassSlowDivision.cpp index 7343c79..9f6d89e 100644 --- a/llvm/lib/Transforms/Utils/BypassSlowDivision.cpp +++ b/llvm/lib/Transforms/Utils/BypassSlowDivision.cpp @@ -40,22 +40,22 @@ using namespace llvm; namespace { - struct QuotRemPair { - Value *Quotient; - Value *Remainder; - - QuotRemPair(Value *InQuotient, Value *InRemainder) - : Quotient(InQuotient), Remainder(InRemainder) {} - }; - - /// A quotient and remainder, plus a BB from which they logically "originate". - /// If you use Quotient or Remainder in a Phi node, you should use BB as its - /// corresponding predecessor. - struct QuotRemWithBB { - BasicBlock *BB = nullptr; - Value *Quotient = nullptr; - Value *Remainder = nullptr; - }; +struct QuotRemPair { + Value *Quotient; + Value *Remainder; + + QuotRemPair(Value *InQuotient, Value *InRemainder) + : Quotient(InQuotient), Remainder(InRemainder) {} +}; + +/// A quotient and remainder, plus a BB from which they logically "originate". +/// If you use Quotient or Remainder in a Phi node, you should use BB as its +/// corresponding predecessor. +struct QuotRemWithBB { + BasicBlock *BB = nullptr; + Value *Quotient = nullptr; + Value *Remainder = nullptr; +}; using DivCacheTy = DenseMap<DivRemMapKey, QuotRemPair>; using BypassWidthsTy = DenseMap<unsigned, unsigned>; diff --git a/llvm/lib/Transforms/Utils/LoopSimplify.cpp b/llvm/lib/Transforms/Utils/LoopSimplify.cpp index 61ffb49..8da6a980 100644 --- a/llvm/lib/Transforms/Utils/LoopSimplify.cpp +++ b/llvm/lib/Transforms/Utils/LoopSimplify.cpp @@ -378,7 +378,7 @@ static BasicBlock *insertUniqueBackedgeBlock(Loop *L, BasicBlock *Preheader, if (P != Preheader) BackedgeBlocks.push_back(P); } - // Create and insert the new backedge block... + // Create and insert the new backedge block. BasicBlock *BEBlock = BasicBlock::Create(Header->getContext(), Header->getName() + ".backedge", F); BranchInst *BETerminator = BranchInst::Create(Header, BEBlock); @@ -737,39 +737,39 @@ bool llvm::simplifyLoop(Loop *L, DominatorTree *DT, LoopInfo *LI, } namespace { - struct LoopSimplify : public FunctionPass { - static char ID; // Pass identification, replacement for typeid - LoopSimplify() : FunctionPass(ID) { - initializeLoopSimplifyPass(*PassRegistry::getPassRegistry()); - } +struct LoopSimplify : public FunctionPass { + static char ID; // Pass identification, replacement for typeid + LoopSimplify() : FunctionPass(ID) { + initializeLoopSimplifyPass(*PassRegistry::getPassRegistry()); + } - bool runOnFunction(Function &F) override; + bool runOnFunction(Function &F) override; - void getAnalysisUsage(AnalysisUsage &AU) const override { - AU.addRequired<AssumptionCacheTracker>(); + void getAnalysisUsage(AnalysisUsage &AU) const override { + AU.addRequired<AssumptionCacheTracker>(); - // We need loop information to identify the loops... - AU.addRequired<DominatorTreeWrapperPass>(); - AU.addPreserved<DominatorTreeWrapperPass>(); + // We need loop information to identify the loops. + AU.addRequired<DominatorTreeWrapperPass>(); + AU.addPreserved<DominatorTreeWrapperPass>(); - AU.addRequired<LoopInfoWrapperPass>(); - AU.addPreserved<LoopInfoWrapperPass>(); + AU.addRequired<LoopInfoWrapperPass>(); + AU.addPreserved<LoopInfoWrapperPass>(); - AU.addPreserved<BasicAAWrapperPass>(); - AU.addPreserved<AAResultsWrapperPass>(); - AU.addPreserved<GlobalsAAWrapperPass>(); - AU.addPreserved<ScalarEvolutionWrapperPass>(); - AU.addPreserved<SCEVAAWrapperPass>(); - AU.addPreservedID(LCSSAID); - AU.addPreservedID(BreakCriticalEdgesID); // No critical edges added. - AU.addPreserved<BranchProbabilityInfoWrapperPass>(); - AU.addPreserved<MemorySSAWrapperPass>(); - } + AU.addPreserved<BasicAAWrapperPass>(); + AU.addPreserved<AAResultsWrapperPass>(); + AU.addPreserved<GlobalsAAWrapperPass>(); + AU.addPreserved<ScalarEvolutionWrapperPass>(); + AU.addPreserved<SCEVAAWrapperPass>(); + AU.addPreservedID(LCSSAID); + AU.addPreservedID(BreakCriticalEdgesID); // No critical edges added. + AU.addPreserved<BranchProbabilityInfoWrapperPass>(); + AU.addPreserved<MemorySSAWrapperPass>(); + } - /// verifyAnalysis() - Verify LoopSimplifyForm's guarantees. - void verifyAnalysis() const override; - }; -} + /// verifyAnalysis() - Verify LoopSimplifyForm's guarantees. + void verifyAnalysis() const override; +}; +} // namespace char LoopSimplify::ID = 0; INITIALIZE_PASS_BEGIN(LoopSimplify, "loop-simplify", @@ -780,12 +780,12 @@ INITIALIZE_PASS_DEPENDENCY(LoopInfoWrapperPass) INITIALIZE_PASS_END(LoopSimplify, "loop-simplify", "Canonicalize natural loops", false, false) -// Publicly exposed interface to pass... +// Publicly exposed interface to pass. char &llvm::LoopSimplifyID = LoopSimplify::ID; Pass *llvm::createLoopSimplifyPass() { return new LoopSimplify(); } /// runOnFunction - Run down all loops in the CFG (recursively, but we could do -/// it in any convenient order) inserting preheaders... +/// it in any convenient order) inserting preheaders. /// bool LoopSimplify::runOnFunction(Function &F) { bool Changed = false; diff --git a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp index 906fa2f..b7224a3 100644 --- a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp +++ b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp @@ -7933,6 +7933,26 @@ void VPRecipeBuilder::collectScaledReductions(VFRange &Range) { (!Chain.ExtendB || ExtendIsOnlyUsedByPartialReductions(Chain.ExtendB))) ScaledReductionMap.try_emplace(Chain.Reduction, Pair.second); } + + // Check that all partial reductions in a chain are only used by other + // partial reductions with the same scale factor. Otherwise we end up creating + // users of scaled reductions where the types of the other operands don't + // match. + for (const auto &[Chain, Scale] : PartialReductionChains) { + auto AllUsersPartialRdx = [ScaleVal = Scale, this](const User *U) { + auto *UI = cast<Instruction>(U); + if (isa<PHINode>(UI) && UI->getParent() == OrigLoop->getHeader()) { + return all_of(UI->users(), [ScaleVal, this](const User *U) { + auto *UI = cast<Instruction>(U); + return ScaledReductionMap.lookup_or(UI, 0) == ScaleVal; + }); + } + return ScaledReductionMap.lookup_or(UI, 0) == ScaleVal || + !OrigLoop->contains(UI->getParent()); + }; + if (!all_of(Chain.Reduction->users(), AllUsersPartialRdx)) + ScaledReductionMap.erase(Chain.Reduction); + } } bool VPRecipeBuilder::getScaledReductions( @@ -8116,11 +8136,8 @@ VPRecipeBase *VPRecipeBuilder::tryToCreateWidenRecipe(VPSingleDefRecipe *R, if (isa<LoadInst>(Instr) || isa<StoreInst>(Instr)) return tryToWidenMemory(Instr, Operands, Range); - if (std::optional<unsigned> ScaleFactor = getScalingForReduction(Instr)) { - if (auto PartialRed = - tryToCreatePartialReduction(Instr, Operands, ScaleFactor.value())) - return PartialRed; - } + if (std::optional<unsigned> ScaleFactor = getScalingForReduction(Instr)) + return tryToCreatePartialReduction(Instr, Operands, ScaleFactor.value()); if (!shouldWiden(Instr, Range)) return nullptr; @@ -8154,9 +8171,9 @@ VPRecipeBuilder::tryToCreatePartialReduction(Instruction *Reduction, isa<VPPartialReductionRecipe>(BinOpRecipe)) std::swap(BinOp, Accumulator); - if (ScaleFactor != - vputils::getVFScaleFactor(Accumulator->getDefiningRecipe())) - return nullptr; + assert(ScaleFactor == + vputils::getVFScaleFactor(Accumulator->getDefiningRecipe()) && + "all accumulators in chain must have same scale factor"); unsigned ReductionOpcode = Reduction->getOpcode(); if (ReductionOpcode == Instruction::Sub) { diff --git a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp index bf3f52c..df835a0 100644 --- a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp +++ b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp @@ -20996,6 +20996,15 @@ BoUpSLP::BlockScheduling::tryScheduleBundle(ArrayRef<Value *> VL, BoUpSLP *SLP, return false; })) return std::nullopt; + if (S.areInstructionsWithCopyableElements() && EI && EI.UserTE->hasState() && + EI.UserTE->hasCopyableElements() && + EI.UserTE->getMainOp()->getParent() == S.getMainOp()->getParent() && + all_of(VL, [&](Value *V) { + if (S.isCopyableElement(V)) + return true; + return isUsedOutsideBlock(V); + })) + return std::nullopt; bool HasCopyables = S.areInstructionsWithCopyableElements(); if (((!HasCopyables && doesNotNeedToSchedule(VL)) || all_of(VL, [&](Value *V) { return S.isNonSchedulable(V); }))) { diff --git a/llvm/lib/Transforms/Vectorize/VPlan.h b/llvm/lib/Transforms/Vectorize/VPlan.h index 8670822..3062e1c 100644 --- a/llvm/lib/Transforms/Vectorize/VPlan.h +++ b/llvm/lib/Transforms/Vectorize/VPlan.h @@ -1163,10 +1163,10 @@ public: bool opcodeMayReadOrWriteFromMemory() const; /// Returns true if the recipe only uses the first lane of operand \p Op. - bool onlyFirstLaneUsed(const VPValue *Op) const override; + bool usesFirstLaneOnly(const VPValue *Op) const override; /// Returns true if the recipe only uses the first part of operand \p Op. - bool onlyFirstPartUsed(const VPValue *Op) const override; + bool usesFirstPartOnly(const VPValue *Op) const override; /// Returns true if this VPInstruction produces a scalar value from a vector, /// e.g. by performing a reduction or extracting a lane. @@ -1393,13 +1393,13 @@ public: return true; } - bool onlyFirstPartUsed(const VPValue *Op) const override { + bool usesFirstPartOnly(const VPValue *Op) const override { assert(is_contained(operands(), Op) && "Op must be an operand of the recipe"); return true; } - bool onlyFirstLaneUsed(const VPValue *Op) const override { + bool usesFirstLaneOnly(const VPValue *Op) const override { assert(is_contained(operands(), Op) && "Op must be an operand of the recipe"); return true; @@ -1628,7 +1628,7 @@ public: VPSlotTracker &SlotTracker) const override; #endif - bool onlyFirstLaneUsed(const VPValue *Op) const override; + bool usesFirstLaneOnly(const VPValue *Op) const override; }; /// A recipe for widening Call instructions using library calls. @@ -1767,7 +1767,7 @@ struct LLVM_ABI_FOR_TEST VPWidenSelectRecipe : public VPRecipeWithIRFlags, } /// Returns true if the recipe only uses the first lane of operand \p Op. - bool onlyFirstLaneUsed(const VPValue *Op) const override { + bool usesFirstLaneOnly(const VPValue *Op) const override { assert(is_contained(operands(), Op) && "Op must be an operand of the recipe"); return Op == getCond() && isInvariantCond(); @@ -1833,7 +1833,7 @@ public: #endif /// Returns true if the recipe only uses the first lane of operand \p Op. - bool onlyFirstLaneUsed(const VPValue *Op) const override { + bool usesFirstLaneOnly(const VPValue *Op) const override { assert(is_contained(operands(), Op) && "Op must be an operand of the recipe"); if (Op == getOperand(0)) @@ -1870,7 +1870,7 @@ public: void execute(VPTransformState &State) override; - bool onlyFirstLaneUsed(const VPValue *Op) const override { + bool usesFirstLaneOnly(const VPValue *Op) const override { assert(is_contained(operands(), Op) && "Op must be an operand of the recipe"); return true; @@ -1884,7 +1884,7 @@ public: } /// Returns true if the recipe only uses the first part of operand \p Op. - bool onlyFirstPartUsed(const VPValue *Op) const override { + bool usesFirstPartOnly(const VPValue *Op) const override { assert(is_contained(operands(), Op) && "Op must be an operand of the recipe"); assert(getNumOperands() <= 2 && "must have at most two operands"); @@ -1922,14 +1922,14 @@ public: Type *getSourceElementType() const { return SourceElementTy; } - bool onlyFirstLaneUsed(const VPValue *Op) const override { + bool usesFirstLaneOnly(const VPValue *Op) const override { assert(is_contained(operands(), Op) && "Op must be an operand of the recipe"); return true; } /// Returns true if the recipe only uses the first part of operand \p Op. - bool onlyFirstPartUsed(const VPValue *Op) const override { + bool usesFirstPartOnly(const VPValue *Op) const override { assert(is_contained(operands(), Op) && "Op must be an operand of the recipe"); assert(getNumOperands() <= 2 && "must have at most two operands"); @@ -2110,7 +2110,7 @@ public: } /// Returns true if the recipe only uses the first lane of operand \p Op. - bool onlyFirstLaneUsed(const VPValue *Op) const override { + bool usesFirstLaneOnly(const VPValue *Op) const override { assert(is_contained(operands(), Op) && "Op must be an operand of the recipe"); // The recipe creates its own wide start value, so it only requests the @@ -2325,7 +2325,7 @@ struct VPFirstOrderRecurrencePHIRecipe : public VPHeaderPHIRecipe { #endif /// Returns true if the recipe only uses the first lane of operand \p Op. - bool onlyFirstLaneUsed(const VPValue *Op) const override { + bool usesFirstLaneOnly(const VPValue *Op) const override { assert(is_contained(operands(), Op) && "Op must be an operand of the recipe"); return Op == getStartValue(); @@ -2399,7 +2399,7 @@ public: bool isInLoop() const { return IsInLoop; } /// Returns true if the recipe only uses the first lane of operand \p Op. - bool onlyFirstLaneUsed(const VPValue *Op) const override { + bool usesFirstLaneOnly(const VPValue *Op) const override { assert(is_contained(operands(), Op) && "Op must be an operand of the recipe"); return isOrdered() || isInLoop(); @@ -2468,13 +2468,13 @@ public: #endif /// Returns true if the recipe only uses the first lane of operand \p Op. - bool onlyFirstLaneUsed(const VPValue *Op) const override { + bool usesFirstLaneOnly(const VPValue *Op) const override { assert(is_contained(operands(), Op) && "Op must be an operand of the recipe"); // Recursing through Blend recipes only, must terminate at header phi's the // latest. return all_of(users(), - [this](VPUser *U) { return U->onlyFirstLaneUsed(this); }); + [this](VPUser *U) { return U->usesFirstLaneOnly(this); }); } }; @@ -2562,7 +2562,7 @@ public: VPCostContext &Ctx) const override; /// Returns true if the recipe only uses the first lane of operand \p Op. - bool onlyFirstLaneUsed(const VPValue *Op) const override = 0; + bool usesFirstLaneOnly(const VPValue *Op) const override = 0; /// Returns the number of stored operands of this interleave group. Returns 0 /// for load interleave groups. @@ -2608,7 +2608,7 @@ public: VPSlotTracker &SlotTracker) const override; #endif - bool onlyFirstLaneUsed(const VPValue *Op) const override { + bool usesFirstLaneOnly(const VPValue *Op) const override { assert(is_contained(operands(), Op) && "Op must be an operand of the recipe"); return Op == getAddr() && !llvm::is_contained(getStoredValues(), Op); @@ -2656,7 +2656,7 @@ public: #endif /// The recipe only uses the first lane of the address, and EVL operand. - bool onlyFirstLaneUsed(const VPValue *Op) const override { + bool usesFirstLaneOnly(const VPValue *Op) const override { assert(is_contained(operands(), Op) && "Op must be an operand of the recipe"); return (Op == getAddr() && !llvm::is_contained(getStoredValues(), Op)) || @@ -2862,7 +2862,7 @@ public: VPValue *getEVL() const { return getOperand(2); } /// Returns true if the recipe only uses the first lane of operand \p Op. - bool onlyFirstLaneUsed(const VPValue *Op) const override { + bool usesFirstLaneOnly(const VPValue *Op) const override { assert(is_contained(operands(), Op) && "Op must be an operand of the recipe"); return Op == getEVL(); @@ -2924,7 +2924,7 @@ public: bool isPredicated() const { return IsPredicated; } /// Returns true if the recipe only uses the first lane of operand \p Op. - bool onlyFirstLaneUsed(const VPValue *Op) const override { + bool usesFirstLaneOnly(const VPValue *Op) const override { assert(is_contained(operands(), Op) && "Op must be an operand of the recipe"); return isSingleScalar(); @@ -3212,9 +3212,8 @@ protected: Alignment(getLoadStoreAlignment(&I)), Consecutive(Consecutive), Reverse(Reverse) { assert((Consecutive || !Reverse) && "Reverse implies consecutive"); - assert(isa<VPVectorEndPointerRecipe>(getAddr()) || - !Reverse && - "Reversed acccess without VPVectorEndPointerRecipe address?"); + assert((isa<VPVectorEndPointerRecipe>(getAddr()) || !Reverse) && + "Reversed acccess without VPVectorEndPointerRecipe address?"); } public: @@ -3300,7 +3299,7 @@ struct LLVM_ABI_FOR_TEST VPWidenLoadRecipe final : public VPWidenMemoryRecipe, #endif /// Returns true if the recipe only uses the first lane of operand \p Op. - bool onlyFirstLaneUsed(const VPValue *Op) const override { + bool usesFirstLaneOnly(const VPValue *Op) const override { assert(is_contained(operands(), Op) && "Op must be an operand of the recipe"); // Widened, consecutive loads operations only demand the first lane of @@ -3341,7 +3340,7 @@ struct VPWidenLoadEVLRecipe final : public VPWidenMemoryRecipe, public VPValue { #endif /// Returns true if the recipe only uses the first lane of operand \p Op. - bool onlyFirstLaneUsed(const VPValue *Op) const override { + bool usesFirstLaneOnly(const VPValue *Op) const override { assert(is_contained(operands(), Op) && "Op must be an operand of the recipe"); // Widened loads only demand the first lane of EVL and consecutive loads @@ -3382,7 +3381,7 @@ struct LLVM_ABI_FOR_TEST VPWidenStoreRecipe final : public VPWidenMemoryRecipe { #endif /// Returns true if the recipe only uses the first lane of operand \p Op. - bool onlyFirstLaneUsed(const VPValue *Op) const override { + bool usesFirstLaneOnly(const VPValue *Op) const override { assert(is_contained(operands(), Op) && "Op must be an operand of the recipe"); // Widened, consecutive stores only demand the first lane of their address, @@ -3425,7 +3424,7 @@ struct VPWidenStoreEVLRecipe final : public VPWidenMemoryRecipe { #endif /// Returns true if the recipe only uses the first lane of operand \p Op. - bool onlyFirstLaneUsed(const VPValue *Op) const override { + bool usesFirstLaneOnly(const VPValue *Op) const override { assert(is_contained(operands(), Op) && "Op must be an operand of the recipe"); if (Op == getEVL()) { @@ -3509,14 +3508,14 @@ public: } /// Returns true if the recipe only uses the first lane of operand \p Op. - bool onlyFirstLaneUsed(const VPValue *Op) const override { + bool usesFirstLaneOnly(const VPValue *Op) const override { assert(is_contained(operands(), Op) && "Op must be an operand of the recipe"); return true; } /// Returns true if the recipe only uses the first part of operand \p Op. - bool onlyFirstPartUsed(const VPValue *Op) const override { + bool usesFirstPartOnly(const VPValue *Op) const override { assert(is_contained(operands(), Op) && "Op must be an operand of the recipe"); return true; @@ -3591,7 +3590,7 @@ public: } /// Returns true if the recipe only uses the first lane of operand \p Op. - bool onlyFirstLaneUsed(const VPValue *Op) const override { + bool usesFirstLaneOnly(const VPValue *Op) const override { assert(is_contained(operands(), Op) && "Op must be an operand of the recipe"); return true; @@ -3701,7 +3700,7 @@ public: VPValue *getStepValue() const { return getOperand(2); } /// Returns true if the recipe only uses the first lane of operand \p Op. - bool onlyFirstLaneUsed(const VPValue *Op) const override { + bool usesFirstLaneOnly(const VPValue *Op) const override { assert(is_contained(operands(), Op) && "Op must be an operand of the recipe"); return true; @@ -3766,7 +3765,7 @@ public: VPValue *getStepValue() const { return getOperand(1); } /// Returns true if the recipe only uses the first lane of operand \p Op. - bool onlyFirstLaneUsed(const VPValue *Op) const override { + bool usesFirstLaneOnly(const VPValue *Op) const override { assert(is_contained(operands(), Op) && "Op must be an operand of the recipe"); return true; diff --git a/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp b/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp index f792d0a..80cd112 100644 --- a/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp +++ b/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp @@ -1276,7 +1276,7 @@ bool VPInstruction::opcodeMayReadOrWriteFromMemory() const { } } -bool VPInstruction::onlyFirstLaneUsed(const VPValue *Op) const { +bool VPInstruction::usesFirstLaneOnly(const VPValue *Op) const { assert(is_contained(operands(), Op) && "Op must be an operand of the recipe"); if (Instruction::isBinaryOp(getOpcode()) || Instruction::isCast(getOpcode())) return vputils::onlyFirstLaneUsed(this); @@ -1325,7 +1325,7 @@ bool VPInstruction::onlyFirstLaneUsed(const VPValue *Op) const { llvm_unreachable("switch should return"); } -bool VPInstruction::onlyFirstPartUsed(const VPValue *Op) const { +bool VPInstruction::usesFirstPartOnly(const VPValue *Op) const { assert(is_contained(operands(), Op) && "Op must be an operand of the recipe"); if (Instruction::isBinaryOp(getOpcode())) return vputils::onlyFirstPartUsed(this); @@ -1692,7 +1692,7 @@ void VPWidenCallRecipe::execute(VPTransformState &State) { if (!VFTy->getParamType(I.index())->isVectorTy()) Arg = State.get(I.value(), VPLane(0)); else - Arg = State.get(I.value(), onlyFirstLaneUsed(I.value())); + Arg = State.get(I.value(), usesFirstLaneOnly(I.value())); Args.push_back(Arg); } @@ -1761,7 +1761,7 @@ void VPWidenIntrinsicRecipe::execute(VPTransformState &State) { State.TTI)) Arg = State.get(I.value(), VPLane(0)); else - Arg = State.get(I.value(), onlyFirstLaneUsed(I.value())); + Arg = State.get(I.value(), usesFirstLaneOnly(I.value())); if (isVectorIntrinsicWithOverloadTypeAtArg(VectorIntrinsicID, I.index(), State.TTI)) TysForDecl.push_back(Arg->getType()); @@ -1843,7 +1843,7 @@ StringRef VPWidenIntrinsicRecipe::getIntrinsicName() const { return Intrinsic::getBaseName(VectorIntrinsicID); } -bool VPWidenIntrinsicRecipe::onlyFirstLaneUsed(const VPValue *Op) const { +bool VPWidenIntrinsicRecipe::usesFirstLaneOnly(const VPValue *Op) const { assert(is_contained(operands(), Op) && "Op must be an operand of the recipe"); return all_of(enumerate(operands()), [this, &Op](const auto &X) { auto [Idx, V] = X; diff --git a/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp b/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp index 82bf79e..48bd697 100644 --- a/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp +++ b/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp @@ -204,7 +204,7 @@ static bool sinkScalarOperands(VPlan &Plan) { return cast<VPRecipeBase>(U)->getParent() != SinkTo; }); if (any_of(UsersOutsideSinkTo, [SinkCandidate](VPUser *U) { - return !U->onlyFirstLaneUsed(SinkCandidate); + return !U->usesFirstLaneOnly(SinkCandidate); })) continue; bool NeedsDuplicating = !UsersOutsideSinkTo.empty(); diff --git a/llvm/lib/Transforms/Vectorize/VPlanUnroll.cpp b/llvm/lib/Transforms/Vectorize/VPlanUnroll.cpp index d6a0028..d4b8b72b 100644 --- a/llvm/lib/Transforms/Vectorize/VPlanUnroll.cpp +++ b/llvm/lib/Transforms/Vectorize/VPlanUnroll.cpp @@ -582,7 +582,7 @@ void VPlanTransforms::replicateByVF(VPlan &Plan, ElementCount VF) { /// Users that only demand the first lane can use the definition for lane /// 0. DefR->replaceUsesWithIf(LaneDefs[0], [DefR](VPUser &U, unsigned) { - return U.onlyFirstLaneUsed(DefR); + return U.usesFirstLaneOnly(DefR); }); // Update each build vector user that currently has DefR as its only diff --git a/llvm/lib/Transforms/Vectorize/VPlanUtils.cpp b/llvm/lib/Transforms/Vectorize/VPlanUtils.cpp index c6380d3..e22c5df 100644 --- a/llvm/lib/Transforms/Vectorize/VPlanUtils.cpp +++ b/llvm/lib/Transforms/Vectorize/VPlanUtils.cpp @@ -18,12 +18,12 @@ using namespace llvm::VPlanPatternMatch; bool vputils::onlyFirstLaneUsed(const VPValue *Def) { return all_of(Def->users(), - [Def](const VPUser *U) { return U->onlyFirstLaneUsed(Def); }); + [Def](const VPUser *U) { return U->usesFirstLaneOnly(Def); }); } bool vputils::onlyFirstPartUsed(const VPValue *Def) { return all_of(Def->users(), - [Def](const VPUser *U) { return U->onlyFirstPartUsed(Def); }); + [Def](const VPUser *U) { return U->usesFirstPartOnly(Def); }); } bool vputils::onlyScalarValuesUsed(const VPValue *Def) { diff --git a/llvm/lib/Transforms/Vectorize/VPlanValue.h b/llvm/lib/Transforms/Vectorize/VPlanValue.h index 83e3fca..5da7463 100644 --- a/llvm/lib/Transforms/Vectorize/VPlanValue.h +++ b/llvm/lib/Transforms/Vectorize/VPlanValue.h @@ -274,12 +274,12 @@ public: virtual bool usesScalars(const VPValue *Op) const { assert(is_contained(operands(), Op) && "Op must be an operand of the recipe"); - return onlyFirstLaneUsed(Op); + return usesFirstLaneOnly(Op); } /// Returns true if the VPUser only uses the first lane of operand \p Op. /// Conservatively returns false. - virtual bool onlyFirstLaneUsed(const VPValue *Op) const { + virtual bool usesFirstLaneOnly(const VPValue *Op) const { assert(is_contained(operands(), Op) && "Op must be an operand of the recipe"); return false; @@ -287,7 +287,7 @@ public: /// Returns true if the VPUser only uses the first part of operand \p Op. /// Conservatively returns false. - virtual bool onlyFirstPartUsed(const VPValue *Op) const { + virtual bool usesFirstPartOnly(const VPValue *Op) const { assert(is_contained(operands(), Op) && "Op must be an operand of the recipe"); return false; |
