diff options
Diffstat (limited to 'llvm/lib')
46 files changed, 539 insertions, 237 deletions
diff --git a/llvm/lib/Analysis/DependenceAnalysis.cpp b/llvm/lib/Analysis/DependenceAnalysis.cpp index 11d8294..e45d1f7 100644 --- a/llvm/lib/Analysis/DependenceAnalysis.cpp +++ b/llvm/lib/Analysis/DependenceAnalysis.cpp @@ -1587,6 +1587,15 @@ static const SCEV *minusSCEVNoSignedOverflow(const SCEV *A, const SCEV *B, return nullptr; } +/// Returns \p A * \p B if it guaranteed not to signed wrap. Otherwise returns +/// nullptr. \p A and \p B must have the same integer type. +static const SCEV *mulSCEVNoSignedOverflow(const SCEV *A, const SCEV *B, + ScalarEvolution &SE) { + if (SE.willNotOverflow(Instruction::Mul, /*Signed=*/true, A, B)) + return SE.getMulExpr(A, B); + return nullptr; +} + /// Returns the absolute value of \p A. In the context of dependence analysis, /// we need an absolute value in a mathematical sense. If \p A is the signed /// minimum value, we cannot represent it unless extending the original type. @@ -1686,7 +1695,11 @@ bool DependenceInfo::strongSIVtest(const SCEV *Coeff, const SCEV *SrcConst, assert(0 < Level && Level <= CommonLevels && "level out of range"); Level--; - const SCEV *Delta = SE->getMinusSCEV(SrcConst, DstConst); + const SCEV *Delta = minusSCEVNoSignedOverflow(SrcConst, DstConst, *SE); + if (!Delta) { + Result.Consistent = false; + return false; + } LLVM_DEBUG(dbgs() << "\t Delta = " << *Delta); LLVM_DEBUG(dbgs() << ", " << *Delta->getType() << "\n"); @@ -1702,7 +1715,9 @@ bool DependenceInfo::strongSIVtest(const SCEV *Coeff, const SCEV *SrcConst, const SCEV *AbsCoeff = absSCEVNoSignedOverflow(Coeff, *SE); if (!AbsDelta || !AbsCoeff) return false; - const SCEV *Product = SE->getMulExpr(UpperBound, AbsCoeff); + const SCEV *Product = mulSCEVNoSignedOverflow(UpperBound, AbsCoeff, *SE); + if (!Product) + return false; return isKnownPredicate(CmpInst::ICMP_SGT, AbsDelta, Product); }(); if (IsDeltaLarge) { diff --git a/llvm/lib/Analysis/RegionPrinter.cpp b/llvm/lib/Analysis/RegionPrinter.cpp index a83af4e..33e073b 100644 --- a/llvm/lib/Analysis/RegionPrinter.cpp +++ b/llvm/lib/Analysis/RegionPrinter.cpp @@ -29,10 +29,9 @@ onlySimpleRegions("only-simple-regions", cl::Hidden, cl::init(false)); -namespace llvm { - -std::string DOTGraphTraits<RegionNode *>::getNodeLabel(RegionNode *Node, - RegionNode *Graph) { +std::string +llvm::DOTGraphTraits<RegionNode *>::getNodeLabel(RegionNode *Node, + RegionNode *Graph) { if (!Node->isSubRegion()) { BasicBlock *BB = Node->getNodeAs<BasicBlock>(); @@ -46,7 +45,8 @@ std::string DOTGraphTraits<RegionNode *>::getNodeLabel(RegionNode *Node, } template <> -struct DOTGraphTraits<RegionInfo *> : public DOTGraphTraits<RegionNode *> { +struct llvm::DOTGraphTraits<RegionInfo *> + : public llvm::DOTGraphTraits<RegionNode *> { DOTGraphTraits (bool isSimple = false) : DOTGraphTraits<RegionNode*>(isSimple) {} @@ -125,7 +125,6 @@ struct DOTGraphTraits<RegionInfo *> : public DOTGraphTraits<RegionNode *> { printRegionCluster(*G->getTopLevelRegion(), GW, 4); } }; -} // end namespace llvm namespace { diff --git a/llvm/lib/CodeGen/AsmPrinter/AsmPrinter.cpp b/llvm/lib/CodeGen/AsmPrinter/AsmPrinter.cpp index 713277d..3aa245b 100644 --- a/llvm/lib/CodeGen/AsmPrinter/AsmPrinter.cpp +++ b/llvm/lib/CodeGen/AsmPrinter/AsmPrinter.cpp @@ -2087,6 +2087,17 @@ void AsmPrinter::emitFunctionBody() { // This is only used to influence register allocation behavior, no // actual initialization is needed. break; + case TargetOpcode::RELOC_NONE: { + // Generate a temporary label for the current PC. + MCSymbol *Sym = OutContext.createTempSymbol("reloc_none"); + OutStreamer->emitLabel(Sym); + const MCExpr *Dot = MCSymbolRefExpr::create(Sym, OutContext); + const MCExpr *Value = MCSymbolRefExpr::create( + OutContext.getOrCreateSymbol(MI.getOperand(0).getSymbolName()), + OutContext); + OutStreamer->emitRelocDirective(*Dot, "BFD_RELOC_NONE", Value, SMLoc()); + break; + } default: emitInstruction(&MI); diff --git a/llvm/lib/CodeGen/GlobalISel/CallLowering.cpp b/llvm/lib/CodeGen/GlobalISel/CallLowering.cpp index b3c3125..7be7468 100644 --- a/llvm/lib/CodeGen/GlobalISel/CallLowering.cpp +++ b/llvm/lib/CodeGen/GlobalISel/CallLowering.cpp @@ -292,7 +292,8 @@ void CallLowering::splitToValueTypes(const ArgInfo &OrigArg, LLVMContext &Ctx = OrigArg.Ty->getContext(); SmallVector<EVT, 4> SplitVTs; - ComputeValueVTs(*TLI, DL, OrigArg.Ty, SplitVTs, Offsets, 0); + ComputeValueVTs(*TLI, DL, OrigArg.Ty, SplitVTs, /*MemVTs=*/nullptr, Offsets, + 0); if (SplitVTs.size() == 0) return; @@ -996,7 +997,7 @@ void CallLowering::insertSRetLoads(MachineIRBuilder &MIRBuilder, Type *RetTy, SmallVector<EVT, 4> SplitVTs; SmallVector<uint64_t, 4> Offsets; - ComputeValueVTs(*TLI, DL, RetTy, SplitVTs, &Offsets, 0); + ComputeValueVTs(*TLI, DL, RetTy, SplitVTs, /*MemVTs=*/nullptr, &Offsets, 0); assert(VRegs.size() == SplitVTs.size()); @@ -1028,7 +1029,7 @@ void CallLowering::insertSRetStores(MachineIRBuilder &MIRBuilder, Type *RetTy, SmallVector<EVT, 4> SplitVTs; SmallVector<uint64_t, 4> Offsets; - ComputeValueVTs(*TLI, DL, RetTy, SplitVTs, &Offsets, 0); + ComputeValueVTs(*TLI, DL, RetTy, SplitVTs, /*MemVTs=*/nullptr, &Offsets, 0); assert(VRegs.size() == SplitVTs.size()); diff --git a/llvm/lib/CodeGen/GlobalISel/IRTranslator.cpp b/llvm/lib/CodeGen/GlobalISel/IRTranslator.cpp index be1b51f..4f6a19f 100644 --- a/llvm/lib/CodeGen/GlobalISel/IRTranslator.cpp +++ b/llvm/lib/CodeGen/GlobalISel/IRTranslator.cpp @@ -2686,6 +2686,13 @@ bool IRTranslator::translateKnownIntrinsic(const CallInst &CI, Intrinsic::ID ID, case Intrinsic::experimental_convergence_entry: case Intrinsic::experimental_convergence_loop: return translateConvergenceControlIntrinsic(CI, ID, MIRBuilder); + case Intrinsic::reloc_none: { + Metadata *MD = cast<MetadataAsValue>(CI.getArgOperand(0))->getMetadata(); + StringRef SymbolName = cast<MDString>(MD)->getString(); + MIRBuilder.buildInstr(TargetOpcode::RELOC_NONE) + .addExternalSymbol(SymbolName.data()); + return true; + } } return false; } diff --git a/llvm/lib/CodeGen/MIRParser/MIParser.cpp b/llvm/lib/CodeGen/MIRParser/MIParser.cpp index 4795d81..434a579 100644 --- a/llvm/lib/CodeGen/MIRParser/MIParser.cpp +++ b/llvm/lib/CodeGen/MIRParser/MIParser.cpp @@ -1161,6 +1161,8 @@ bool MIParser::parse(MachineInstr *&MI) { MemOperands.push_back(MemOp); if (Token.isNewlineOrEOF()) break; + if (OpCode == TargetOpcode::BUNDLE && Token.is(MIToken::lbrace)) + break; if (Token.isNot(MIToken::comma)) return error("expected ',' before the next machine memory operand"); lex(); diff --git a/llvm/lib/CodeGen/MachineInstr.cpp b/llvm/lib/CodeGen/MachineInstr.cpp index 8ad9245..37e5c51 100644 --- a/llvm/lib/CodeGen/MachineInstr.cpp +++ b/llvm/lib/CodeGen/MachineInstr.cpp @@ -1547,10 +1547,14 @@ bool MachineInstr::mayAlias(BatchAAResults *AA, const MachineInstr &Other, // Check each pair of memory operands from both instructions, which can't // alias only if all pairs won't alias. - for (auto *MMOa : memoperands()) - for (auto *MMOb : Other.memoperands()) + for (auto *MMOa : memoperands()) { + for (auto *MMOb : Other.memoperands()) { + if (!MMOa->isStore() && !MMOb->isStore()) + continue; if (MemOperandsHaveAlias(MFI, AA, UseTBAA, MMOa, MMOb)) return true; + } + } return false; } diff --git a/llvm/lib/CodeGen/MachineInstrBundle.cpp b/llvm/lib/CodeGen/MachineInstrBundle.cpp index f4c1a8b..fa654f2 100644 --- a/llvm/lib/CodeGen/MachineInstrBundle.cpp +++ b/llvm/lib/CodeGen/MachineInstrBundle.cpp @@ -143,6 +143,7 @@ void llvm::finalizeBundle(MachineBasicBlock &MBB, SmallSet<Register, 8> KilledUseSet; SmallSet<Register, 8> UndefUseSet; SmallVector<std::pair<Register, Register>> TiedOperands; + SmallVector<MachineInstr *> MemMIs; for (auto MII = FirstMI; MII != LastMI; ++MII) { // Debug instructions have no effects to track. if (MII->isDebugInstr()) @@ -206,6 +207,9 @@ void llvm::finalizeBundle(MachineBasicBlock &MBB, MIB.setMIFlag(MachineInstr::FrameSetup); if (MII->getFlag(MachineInstr::FrameDestroy)) MIB.setMIFlag(MachineInstr::FrameDestroy); + + if (MII->mayLoadOrStore()) + MemMIs.push_back(&*MII); } for (Register Reg : LocalDefs) { @@ -231,6 +235,8 @@ void llvm::finalizeBundle(MachineBasicBlock &MBB, assert(UseIdx < ExternUses.size()); MIB->tieOperands(DefIdx, LocalDefs.size() + UseIdx); } + + MIB->cloneMergedMemRefs(MF, MemMIs); } /// finalizeBundle - Same functionality as the previous finalizeBundle except diff --git a/llvm/lib/CodeGen/SafeStack.cpp b/llvm/lib/CodeGen/SafeStack.cpp index e9ffa85..6b747f3 100644 --- a/llvm/lib/CodeGen/SafeStack.cpp +++ b/llvm/lib/CodeGen/SafeStack.cpp @@ -196,8 +196,6 @@ public: bool run(); }; -constexpr Align SafeStack::StackAlignment; - uint64_t SafeStack::getStaticAllocaAllocationSize(const AllocaInst* AI) { uint64_t Size = DL.getTypeAllocSize(AI->getAllocatedType()); if (AI->isArrayAllocation()) { diff --git a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp index 9961c98..2f598b2 100644 --- a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp @@ -4758,7 +4758,7 @@ void SelectionDAGBuilder::visitStoreToSwiftError(const StoreInst &I) { SmallVector<uint64_t, 4> Offsets; const Value *SrcV = I.getOperand(0); ComputeValueVTs(DAG.getTargetLoweringInfo(), DAG.getDataLayout(), - SrcV->getType(), ValueVTs, &Offsets, 0); + SrcV->getType(), ValueVTs, /*MemVTs=*/nullptr, &Offsets, 0); assert(ValueVTs.size() == 1 && Offsets[0] == 0 && "expect a single EVT for swifterror"); @@ -4794,7 +4794,7 @@ void SelectionDAGBuilder::visitLoadFromSwiftError(const LoadInst &I) { SmallVector<EVT, 4> ValueVTs; SmallVector<uint64_t, 4> Offsets; ComputeValueVTs(DAG.getTargetLoweringInfo(), DAG.getDataLayout(), Ty, - ValueVTs, &Offsets, 0); + ValueVTs, /*MemVTs=*/nullptr, &Offsets, 0); assert(ValueVTs.size() == 1 && Offsets[0] == 0 && "expect a single EVT for swifterror"); @@ -7811,6 +7811,17 @@ void SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, return; } + case Intrinsic::reloc_none: { + Metadata *MD = cast<MetadataAsValue>(I.getArgOperand(0))->getMetadata(); + StringRef SymbolName = cast<MDString>(MD)->getString(); + SDValue Ops[2] = { + getRoot(), + DAG.getTargetExternalSymbol( + SymbolName.data(), TLI.getProgramPointerTy(DAG.getDataLayout()))}; + DAG.setRoot(DAG.getNode(ISD::RELOC_NONE, sdl, MVT::Other, Ops)); + return; + } + case Intrinsic::eh_exceptionpointer: case Intrinsic::eh_exceptioncode: { // Get the exception pointer vreg, copy from it, and resize it to fit. diff --git a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGDumper.cpp b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGDumper.cpp index 77377d3..d3e1628 100644 --- a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGDumper.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGDumper.cpp @@ -472,6 +472,8 @@ std::string SDNode::getOperationName(const SelectionDAG *G) const { case ISD::LIFETIME_END: return "lifetime.end"; case ISD::FAKE_USE: return "fake_use"; + case ISD::RELOC_NONE: + return "reloc_none"; case ISD::PSEUDO_PROBE: return "pseudoprobe"; case ISD::GC_TRANSITION_START: return "gc_transition.start"; diff --git a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp index 6c11c5b..8bc5d2f 100644 --- a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp @@ -2550,6 +2550,11 @@ void SelectionDAGISel::Select_FAKE_USE(SDNode *N) { N->getOperand(1), N->getOperand(0)); } +void SelectionDAGISel::Select_RELOC_NONE(SDNode *N) { + CurDAG->SelectNodeTo(N, TargetOpcode::RELOC_NONE, N->getValueType(0), + N->getOperand(1), N->getOperand(0)); +} + void SelectionDAGISel::Select_FREEZE(SDNode *N) { // TODO: We don't have FREEZE pseudo-instruction in MachineInstr-level now. // If FREEZE instruction is added later, the code below must be changed as @@ -3325,6 +3330,9 @@ void SelectionDAGISel::SelectCodeCommon(SDNode *NodeToMatch, case ISD::FAKE_USE: Select_FAKE_USE(NodeToMatch); return; + case ISD::RELOC_NONE: + Select_RELOC_NONE(NodeToMatch); + return; case ISD::FREEZE: Select_FREEZE(NodeToMatch); return; diff --git a/llvm/lib/CodeGen/TwoAddressInstructionPass.cpp b/llvm/lib/CodeGen/TwoAddressInstructionPass.cpp index 414e414..b99e1c7 100644 --- a/llvm/lib/CodeGen/TwoAddressInstructionPass.cpp +++ b/llvm/lib/CodeGen/TwoAddressInstructionPass.cpp @@ -1665,6 +1665,17 @@ void TwoAddressInstructionImpl::processTiedPairs(MachineInstr *MI, // by SubRegB is compatible with RegA with no subregister. So regardless of // whether the dest oper writes a subreg, the source oper should not. MO.setSubReg(0); + + // Update uses of RegB to uses of RegA inside the bundle. + if (MI->isBundle()) { + for (MachineOperand &MO : mi_bundle_ops(*MI)) { + if (MO.isReg() && MO.getReg() == RegB) { + assert(MO.getSubReg() == 0 && SubRegB == 0 && + "tied subregister uses in bundled instructions not supported"); + MO.setReg(RegA); + } + } + } } if (AllUsesCopied) { diff --git a/llvm/lib/IR/DebugInfoMetadata.cpp b/llvm/lib/IR/DebugInfoMetadata.cpp index fafc325..a98e925 100644 --- a/llvm/lib/IR/DebugInfoMetadata.cpp +++ b/llvm/lib/IR/DebugInfoMetadata.cpp @@ -962,16 +962,29 @@ DIType *DIDerivedType::getClassType() const { assert(getTag() == dwarf::DW_TAG_ptr_to_member_type); return cast_or_null<DIType>(getExtraData()); } + +// Helper function to extract ConstantAsMetadata from ExtraData, +// handling extra data MDTuple unwrapping if needed. +static ConstantAsMetadata *extractConstantMetadata(Metadata *ExtraData) { + Metadata *ED = ExtraData; + if (auto *Tuple = dyn_cast_or_null<MDTuple>(ED)) { + if (Tuple->getNumOperands() != 1) + return nullptr; + ED = Tuple->getOperand(0); + } + return cast_or_null<ConstantAsMetadata>(ED); +} + uint32_t DIDerivedType::getVBPtrOffset() const { assert(getTag() == dwarf::DW_TAG_inheritance); - if (auto *CM = cast_or_null<ConstantAsMetadata>(getExtraData())) + if (auto *CM = extractConstantMetadata(getExtraData())) if (auto *CI = dyn_cast_or_null<ConstantInt>(CM->getValue())) return static_cast<uint32_t>(CI->getZExtValue()); return 0; } Constant *DIDerivedType::getStorageOffsetInBits() const { assert(getTag() == dwarf::DW_TAG_member && isBitField()); - if (auto *C = cast_or_null<ConstantAsMetadata>(getExtraData())) + if (auto *C = extractConstantMetadata(getExtraData())) return C->getValue(); return nullptr; } @@ -980,13 +993,13 @@ Constant *DIDerivedType::getConstant() const { assert((getTag() == dwarf::DW_TAG_member || getTag() == dwarf::DW_TAG_variable) && isStaticMember()); - if (auto *C = cast_or_null<ConstantAsMetadata>(getExtraData())) + if (auto *C = extractConstantMetadata(getExtraData())) return C->getValue(); return nullptr; } Constant *DIDerivedType::getDiscriminantValue() const { assert(getTag() == dwarf::DW_TAG_member && !isStaticMember()); - if (auto *C = cast_or_null<ConstantAsMetadata>(getExtraData())) + if (auto *C = extractConstantMetadata(getExtraData())) return C->getValue(); return nullptr; } diff --git a/llvm/lib/IR/Verifier.cpp b/llvm/lib/IR/Verifier.cpp index 24f90bf..f1e473a 100644 --- a/llvm/lib/IR/Verifier.cpp +++ b/llvm/lib/IR/Verifier.cpp @@ -6013,6 +6013,12 @@ void Verifier::visitIntrinsicCall(Intrinsic::ID ID, CallBase &Call) { Check(cast<ConstantInt>(Call.getArgOperand(3))->getZExtValue() < 2, "cache type argument to llvm.prefetch must be 0-1", Call); break; + case Intrinsic::reloc_none: { + Check(isa<MDString>( + cast<MetadataAsValue>(Call.getArgOperand(0))->getMetadata()), + "llvm.reloc.none argument must be a metadata string", &Call); + break; + } case Intrinsic::stackprotector: Check(isa<AllocaInst>(Call.getArgOperand(1)->stripPointerCasts()), "llvm.stackprotector parameter #2 must resolve to an alloca.", Call); diff --git a/llvm/lib/ProfileData/InstrProf.cpp b/llvm/lib/ProfileData/InstrProf.cpp index 0208735..5498787 100644 --- a/llvm/lib/ProfileData/InstrProf.cpp +++ b/llvm/lib/ProfileData/InstrProf.cpp @@ -1690,7 +1690,7 @@ Expected<Header> Header::readFromBuffer(const unsigned char *Buffer) { IndexedInstrProf::ProfVersion::CurrentVersion) return make_error<InstrProfError>(instrprof_error::unsupported_version); - static_assert(IndexedInstrProf::ProfVersion::CurrentVersion == Version12, + static_assert(IndexedInstrProf::ProfVersion::CurrentVersion == Version13, "Please update the reader as needed when a new field is added " "or when indexed profile version gets bumped."); @@ -1723,10 +1723,11 @@ size_t Header::size() const { // of the header, and byte offset of existing fields shouldn't change when // indexed profile version gets incremented. static_assert( - IndexedInstrProf::ProfVersion::CurrentVersion == Version12, + IndexedInstrProf::ProfVersion::CurrentVersion == Version13, "Please update the size computation below if a new field has " "been added to the header; for a version bump without new " "fields, add a case statement to fall through to the latest version."); + case 13ull: case 12ull: return 72; case 11ull: diff --git a/llvm/lib/ProfileData/InstrProfWriter.cpp b/llvm/lib/ProfileData/InstrProfWriter.cpp index a347351..0f15ca8 100644 --- a/llvm/lib/ProfileData/InstrProfWriter.cpp +++ b/llvm/lib/ProfileData/InstrProfWriter.cpp @@ -542,7 +542,7 @@ Error InstrProfWriter::writeImpl(ProfOStream &OS) { // The WritePrevVersion handling will either need to be removed or updated // if the version is advanced beyond 12. static_assert(IndexedInstrProf::ProfVersion::CurrentVersion == - IndexedInstrProf::ProfVersion::Version12); + IndexedInstrProf::ProfVersion::Version13); if (static_cast<bool>(ProfileKind & InstrProfKind::IRInstrumentation)) Header.Version |= VARIANT_MASK_IR_PROF; if (static_cast<bool>(ProfileKind & InstrProfKind::ContextSensitive)) diff --git a/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp b/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp index 1b559a6..f5081a9 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp @@ -1248,7 +1248,8 @@ void AMDGPUTargetLowering::analyzeFormalArgumentsCompute( SmallVector<EVT, 16> ValueVTs; SmallVector<uint64_t, 16> Offsets; - ComputeValueVTs(*this, DL, BaseArgTy, ValueVTs, &Offsets, ArgOffset); + ComputeValueVTs(*this, DL, BaseArgTy, ValueVTs, /*MemVTs=*/nullptr, + &Offsets, ArgOffset); for (unsigned Value = 0, NumValues = ValueVTs.size(); Value != NumValues; ++Value) { diff --git a/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp b/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp index 9460145..6ce18ea 100644 --- a/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp +++ b/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp @@ -3917,6 +3917,9 @@ bool SIInstrInfo::areMemAccessesTriviallyDisjoint(const MachineInstr &MIa, if (isLDSDMA(MIa) || isLDSDMA(MIb)) return false; + if (MIa.isBundle() || MIb.isBundle()) + return false; + // TODO: Should we check the address space from the MachineMemOperand? That // would allow us to distinguish objects we know don't alias based on the // underlying address space, even if it was lowered to a different one, diff --git a/llvm/lib/Target/BPF/BPFAbstractMemberAccess.cpp b/llvm/lib/Target/BPF/BPFAbstractMemberAccess.cpp index 8c7bc2f..81303fa 100644 --- a/llvm/lib/Target/BPF/BPFAbstractMemberAccess.cpp +++ b/llvm/lib/Target/BPF/BPFAbstractMemberAccess.cpp @@ -97,7 +97,6 @@ #define DEBUG_TYPE "bpf-abstract-member-access" namespace llvm { -constexpr StringRef BPFCoreSharedInfo::AmaAttr; uint32_t BPFCoreSharedInfo::SeqNum; Instruction *BPFCoreSharedInfo::insertPassThrough(Module *M, BasicBlock *BB, diff --git a/llvm/lib/Target/BPF/BPFISelLowering.cpp b/llvm/lib/Target/BPF/BPFISelLowering.cpp index 6e5520c..3c61216 100644 --- a/llvm/lib/Target/BPF/BPFISelLowering.cpp +++ b/llvm/lib/Target/BPF/BPFISelLowering.cpp @@ -803,26 +803,6 @@ SDValue BPFTargetLowering::LowerJumpTable(SDValue Op, SelectionDAG &DAG) const { return getAddr(N, DAG); } -const char *BPFTargetLowering::getTargetNodeName(unsigned Opcode) const { - switch ((BPFISD::NodeType)Opcode) { - case BPFISD::FIRST_NUMBER: - break; - case BPFISD::RET_GLUE: - return "BPFISD::RET_GLUE"; - case BPFISD::CALL: - return "BPFISD::CALL"; - case BPFISD::SELECT_CC: - return "BPFISD::SELECT_CC"; - case BPFISD::BR_CC: - return "BPFISD::BR_CC"; - case BPFISD::Wrapper: - return "BPFISD::Wrapper"; - case BPFISD::MEMCPY: - return "BPFISD::MEMCPY"; - } - return nullptr; -} - static SDValue getTargetNode(ConstantPoolSDNode *N, const SDLoc &DL, EVT Ty, SelectionDAG &DAG, unsigned Flags) { return DAG.getTargetConstantPool(N->getConstVal(), Ty, N->getAlign(), diff --git a/llvm/lib/Target/BPF/BPFISelLowering.h b/llvm/lib/Target/BPF/BPFISelLowering.h index 5243d49..3d6e7c7 100644 --- a/llvm/lib/Target/BPF/BPFISelLowering.h +++ b/llvm/lib/Target/BPF/BPFISelLowering.h @@ -20,17 +20,6 @@ namespace llvm { class BPFSubtarget; -namespace BPFISD { -enum NodeType : unsigned { - FIRST_NUMBER = ISD::BUILTIN_OP_END, - RET_GLUE, - CALL, - SELECT_CC, - BR_CC, - Wrapper, - MEMCPY -}; -} class BPFTargetLowering : public TargetLowering { public: @@ -39,9 +28,6 @@ public: // Provide custom lowering hooks for some operations. SDValue LowerOperation(SDValue Op, SelectionDAG &DAG) const override; - // This method returns the name of a target specific DAG node. - const char *getTargetNodeName(unsigned Opcode) const override; - // This method decides whether folding a constant offset // with the given GlobalAddress is legal. bool isOffsetFoldingLegal(const GlobalAddressSDNode *GA) const override; diff --git a/llvm/lib/Target/BPF/BPFInstrInfo.td b/llvm/lib/Target/BPF/BPFInstrInfo.td index 51c32b2..bdacf9c 100644 --- a/llvm/lib/Target/BPF/BPFInstrInfo.td +++ b/llvm/lib/Target/BPF/BPFInstrInfo.td @@ -41,14 +41,12 @@ def BPFcallseq_start: SDNode<"ISD::CALLSEQ_START", SDT_BPFCallSeqStart, [SDNPHasChain, SDNPOutGlue]>; def BPFcallseq_end : SDNode<"ISD::CALLSEQ_END", SDT_BPFCallSeqEnd, [SDNPHasChain, SDNPOptInGlue, SDNPOutGlue]>; -def BPFbrcc : SDNode<"BPFISD::BR_CC", SDT_BPFBrCC, - [SDNPHasChain, SDNPOutGlue, SDNPInGlue]>; +def BPFbrcc : SDNode<"BPFISD::BR_CC", SDT_BPFBrCC, [SDNPHasChain]>; def BPFselectcc : SDNode<"BPFISD::SELECT_CC", SDT_BPFSelectCC>; def BPFWrapper : SDNode<"BPFISD::Wrapper", SDT_BPFWrapper>; def BPFmemcpy : SDNode<"BPFISD::MEMCPY", SDT_BPFMEMCPY, - [SDNPHasChain, SDNPInGlue, SDNPOutGlue, - SDNPMayStore, SDNPMayLoad]>; + [SDNPHasChain, SDNPMayStore, SDNPMayLoad]>; def BPFIsLittleEndian : Predicate<"Subtarget->isLittleEndian()">; def BPFIsBigEndian : Predicate<"!Subtarget->isLittleEndian()">; def BPFHasALU32 : Predicate<"Subtarget->getHasAlu32()">; diff --git a/llvm/lib/Target/BPF/BPFPreserveDIType.cpp b/llvm/lib/Target/BPF/BPFPreserveDIType.cpp index d3b0c02..6a11ea6 100644 --- a/llvm/lib/Target/BPF/BPFPreserveDIType.cpp +++ b/llvm/lib/Target/BPF/BPFPreserveDIType.cpp @@ -27,10 +27,6 @@ #define DEBUG_TYPE "bpf-preserve-di-type" -namespace llvm { -constexpr StringRef BPFCoreSharedInfo::TypeIdAttr; -} // namespace llvm - using namespace llvm; namespace { diff --git a/llvm/lib/Target/BPF/BPFSelectionDAGInfo.cpp b/llvm/lib/Target/BPF/BPFSelectionDAGInfo.cpp index 3e29e6c..0e6d35d 100644 --- a/llvm/lib/Target/BPF/BPFSelectionDAGInfo.cpp +++ b/llvm/lib/Target/BPF/BPFSelectionDAGInfo.cpp @@ -10,12 +10,20 @@ // //===----------------------------------------------------------------------===// +#include "BPFSelectionDAGInfo.h" #include "BPFTargetMachine.h" #include "llvm/CodeGen/SelectionDAG.h" + +#define GET_SDNODE_DESC +#include "BPFGenSDNodeInfo.inc" + using namespace llvm; #define DEBUG_TYPE "bpf-selectiondag-info" +BPFSelectionDAGInfo::BPFSelectionDAGInfo() + : SelectionDAGGenTargetInfo(BPFGenSDNodeInfo) {} + SDValue BPFSelectionDAGInfo::EmitTargetCodeForMemcpy( SelectionDAG &DAG, const SDLoc &dl, SDValue Chain, SDValue Dst, SDValue Src, SDValue Size, Align Alignment, bool isVolatile, bool AlwaysInline, @@ -31,11 +39,7 @@ SDValue BPFSelectionDAGInfo::EmitTargetCodeForMemcpy( if (StoresNumEstimate > getCommonMaxStoresPerMemFunc()) return SDValue(); - SDVTList VTs = DAG.getVTList(MVT::Other, MVT::Glue); - - Dst = DAG.getNode(BPFISD::MEMCPY, dl, VTs, Chain, Dst, Src, - DAG.getConstant(CopyLen, dl, MVT::i64), - DAG.getConstant(Alignment.value(), dl, MVT::i64)); - - return Dst.getValue(0); + return DAG.getNode(BPFISD::MEMCPY, dl, MVT::Other, Chain, Dst, Src, + DAG.getConstant(CopyLen, dl, MVT::i64), + DAG.getConstant(Alignment.value(), dl, MVT::i64)); } diff --git a/llvm/lib/Target/BPF/BPFSelectionDAGInfo.h b/llvm/lib/Target/BPF/BPFSelectionDAGInfo.h index 79f05e5..7345d2d 100644 --- a/llvm/lib/Target/BPF/BPFSelectionDAGInfo.h +++ b/llvm/lib/Target/BPF/BPFSelectionDAGInfo.h @@ -15,10 +15,15 @@ #include "llvm/CodeGen/SelectionDAGTargetInfo.h" +#define GET_SDNODE_ENUM +#include "BPFGenSDNodeInfo.inc" + namespace llvm { -class BPFSelectionDAGInfo : public SelectionDAGTargetInfo { +class BPFSelectionDAGInfo : public SelectionDAGGenTargetInfo { public: + BPFSelectionDAGInfo(); + SDValue EmitTargetCodeForMemcpy(SelectionDAG &DAG, const SDLoc &dl, SDValue Chain, SDValue Dst, SDValue Src, SDValue Size, Align Alignment, @@ -27,9 +32,8 @@ public: MachinePointerInfo SrcPtrInfo) const override; unsigned getCommonMaxStoresPerMemFunc() const { return 128; } - }; -} +} // namespace llvm #endif diff --git a/llvm/lib/Target/BPF/CMakeLists.txt b/llvm/lib/Target/BPF/CMakeLists.txt index 3678f13..fa539a0 100644 --- a/llvm/lib/Target/BPF/CMakeLists.txt +++ b/llvm/lib/Target/BPF/CMakeLists.txt @@ -10,6 +10,7 @@ tablegen(LLVM BPFGenDisassemblerTables.inc -gen-disassembler) tablegen(LLVM BPFGenInstrInfo.inc -gen-instr-info) tablegen(LLVM BPFGenMCCodeEmitter.inc -gen-emitter) tablegen(LLVM BPFGenRegisterInfo.inc -gen-register-info) +tablegen(LLVM BPFGenSDNodeInfo.inc -gen-sd-node-info) tablegen(LLVM BPFGenSubtargetInfo.inc -gen-subtarget) tablegen(LLVM BPFGenGlobalISel.inc -gen-global-isel) tablegen(LLVM BPFGenRegisterBank.inc -gen-register-bank) diff --git a/llvm/lib/Target/DirectX/DXILDataScalarization.cpp b/llvm/lib/Target/DirectX/DXILDataScalarization.cpp index d507d71..9f1616f 100644 --- a/llvm/lib/Target/DirectX/DXILDataScalarization.cpp +++ b/llvm/lib/Target/DirectX/DXILDataScalarization.cpp @@ -304,40 +304,76 @@ bool DataScalarizerVisitor::visitGetElementPtrInst(GetElementPtrInst &GEPI) { GEPOperator *GOp = cast<GEPOperator>(&GEPI); Value *PtrOperand = GOp->getPointerOperand(); Type *NewGEPType = GOp->getSourceElementType(); - bool NeedsTransform = false; // Unwrap GEP ConstantExprs to find the base operand and element type - while (auto *CE = dyn_cast<ConstantExpr>(PtrOperand)) { - if (auto *GEPCE = dyn_cast<GEPOperator>(CE)) { - GOp = GEPCE; - PtrOperand = GEPCE->getPointerOperand(); - NewGEPType = GEPCE->getSourceElementType(); - } else - break; + while (auto *GEPCE = dyn_cast_or_null<GEPOperator>( + dyn_cast<ConstantExpr>(PtrOperand))) { + GOp = GEPCE; + PtrOperand = GEPCE->getPointerOperand(); + NewGEPType = GEPCE->getSourceElementType(); } + Type *const OrigGEPType = NewGEPType; + Value *const OrigOperand = PtrOperand; + if (GlobalVariable *NewGlobal = lookupReplacementGlobal(PtrOperand)) { NewGEPType = NewGlobal->getValueType(); PtrOperand = NewGlobal; - NeedsTransform = true; } else if (AllocaInst *Alloca = dyn_cast<AllocaInst>(PtrOperand)) { Type *AllocatedType = Alloca->getAllocatedType(); if (isa<ArrayType>(AllocatedType) && - AllocatedType != GOp->getResultElementType()) { + AllocatedType != GOp->getResultElementType()) NewGEPType = AllocatedType; - NeedsTransform = true; + } else + return false; // Only GEPs into an alloca or global variable are considered + + // Defer changing i8 GEP types until dxil-flatten-arrays + if (OrigGEPType->isIntegerTy(8)) + NewGEPType = OrigGEPType; + + // If the original type is a "sub-type" of the new type, then ensure the gep + // correctly zero-indexes the extra dimensions to keep the offset calculation + // correct. + // Eg: + // i32, [4 x i32] and [8 x [4 x i32]] are sub-types of [8 x [4 x i32]], etc. + // + // So then: + // gep [4 x i32] %idx + // -> gep [8 x [4 x i32]], i32 0, i32 %idx + // gep i32 %idx + // -> gep [8 x [4 x i32]], i32 0, i32 0, i32 %idx + uint32_t MissingDims = 0; + Type *SubType = NewGEPType; + + // The new type will be in its array version; so match accordingly. + Type *const GEPArrType = equivalentArrayTypeFromVector(OrigGEPType); + + while (SubType != GEPArrType) { + MissingDims++; + + ArrayType *ArrType = dyn_cast<ArrayType>(SubType); + if (!ArrType) { + assert(SubType == GEPArrType && + "GEP uses an DXIL invalid sub-type of alloca/global variable"); + break; } + + SubType = ArrType->getElementType(); } + bool NeedsTransform = OrigOperand != PtrOperand || + OrigGEPType != NewGEPType || MissingDims != 0; + if (!NeedsTransform) return false; - // Keep scalar GEPs scalar; dxil-flatten-arrays will do flattening later - if (!isa<ArrayType>(GOp->getSourceElementType())) - NewGEPType = GOp->getSourceElementType(); - IRBuilder<> Builder(&GEPI); - SmallVector<Value *, MaxVecSize> Indices(GOp->indices()); + SmallVector<Value *, MaxVecSize> Indices; + + for (uint32_t I = 0; I < MissingDims; I++) + Indices.push_back(Builder.getInt32(0)); + llvm::append_range(Indices, GOp->indices()); + Value *NewGEP = Builder.CreateGEP(NewGEPType, PtrOperand, Indices, GOp->getName(), GOp->getNoWrapFlags()); diff --git a/llvm/lib/Target/DirectX/DXILIntrinsicExpansion.cpp b/llvm/lib/Target/DirectX/DXILIntrinsicExpansion.cpp index ebb7c26..e0d2dbd 100644 --- a/llvm/lib/Target/DirectX/DXILIntrinsicExpansion.cpp +++ b/llvm/lib/Target/DirectX/DXILIntrinsicExpansion.cpp @@ -197,6 +197,7 @@ static Value *expand16BitIsNormal(CallInst *Orig) { static bool isIntrinsicExpansion(Function &F) { switch (F.getIntrinsicID()) { + case Intrinsic::assume: case Intrinsic::abs: case Intrinsic::atan2: case Intrinsic::exp: @@ -988,6 +989,9 @@ static bool expandIntrinsic(Function &F, CallInst *Orig) { case Intrinsic::abs: Result = expandAbs(Orig); break; + case Intrinsic::assume: + Orig->eraseFromParent(); + return true; case Intrinsic::atan2: Result = expandAtan2Intrinsic(Orig); break; diff --git a/llvm/lib/Target/DirectX/DXILOpLowering.cpp b/llvm/lib/Target/DirectX/DXILOpLowering.cpp index 8720460..e46a393 100644 --- a/llvm/lib/Target/DirectX/DXILOpLowering.cpp +++ b/llvm/lib/Target/DirectX/DXILOpLowering.cpp @@ -904,8 +904,6 @@ public: case Intrinsic::dx_resource_casthandle: // NOTE: llvm.dbg.value is supported as is in DXIL. case Intrinsic::dbg_value: - // NOTE: llvm.assume is supported as is in DXIL. - case Intrinsic::assume: case Intrinsic::not_intrinsic: if (F.use_empty()) F.eraseFromParent(); diff --git a/llvm/lib/Target/Hexagon/HexagonInstrInfo.cpp b/llvm/lib/Target/Hexagon/HexagonInstrInfo.cpp index 47726d6..55bafde 100644 --- a/llvm/lib/Target/Hexagon/HexagonInstrInfo.cpp +++ b/llvm/lib/Target/Hexagon/HexagonInstrInfo.cpp @@ -4753,6 +4753,19 @@ bool HexagonInstrInfo::getBundleNoShuf(const MachineInstr &MIB) const { return (Operand.isImm() && (Operand.getImm() & memShufDisabledMask) != 0); } +bool HexagonInstrInfo::isQFPMul(const MachineInstr *MI) const { + return (MI->getOpcode() == Hexagon::V6_vmpy_qf16_hf || + MI->getOpcode() == Hexagon::V6_vmpy_qf16_mix_hf || + MI->getOpcode() == Hexagon::V6_vmpy_qf32_hf || + MI->getOpcode() == Hexagon::V6_vmpy_qf32_mix_hf || + MI->getOpcode() == Hexagon::V6_vmpy_qf32_sf || + MI->getOpcode() == Hexagon::V6_vmpy_qf16_mix_hf || + MI->getOpcode() == Hexagon::V6_vmpy_qf16 || + MI->getOpcode() == Hexagon::V6_vmpy_qf32_mix_hf || + MI->getOpcode() == Hexagon::V6_vmpy_qf32_qf16 || + MI->getOpcode() == Hexagon::V6_vmpy_qf32); +} + // Addressing mode relations. short HexagonInstrInfo::changeAddrMode_abs_io(short Opc) const { return Opc >= 0 ? Hexagon::changeAddrMode_abs_io(Opc) : Opc; diff --git a/llvm/lib/Target/Hexagon/HexagonInstrInfo.h b/llvm/lib/Target/Hexagon/HexagonInstrInfo.h index c17e527..48adf82 100644 --- a/llvm/lib/Target/Hexagon/HexagonInstrInfo.h +++ b/llvm/lib/Target/Hexagon/HexagonInstrInfo.h @@ -532,6 +532,7 @@ public: } MCInst getNop() const override; + bool isQFPMul(const MachineInstr *MF) const; }; /// \brief Create RegSubRegPair from a register MachineOperand diff --git a/llvm/lib/Target/Hexagon/HexagonQFPOptimizer.cpp b/llvm/lib/Target/Hexagon/HexagonQFPOptimizer.cpp index f29a739..8801f69 100644 --- a/llvm/lib/Target/Hexagon/HexagonQFPOptimizer.cpp +++ b/llvm/lib/Target/Hexagon/HexagonQFPOptimizer.cpp @@ -58,7 +58,7 @@ // are PHI inst. // //===----------------------------------------------------------------------===// -#include <unordered_set> + #define HEXAGON_QFP_OPTIMIZER "QFP optimizer pass" #include "Hexagon.h" @@ -86,6 +86,9 @@ using namespace llvm; cl::opt<bool> DisableQFOptimizer("disable-qfp-opt", cl::init(false), cl::desc("Disable optimization of Qfloat operations.")); +cl::opt<bool> DisableQFOptForMul( + "disable-qfp-opt-mul", cl::init(true), + cl::desc("Disable optimization of Qfloat operations for multiply.")); namespace { const std::map<unsigned short, unsigned short> QFPInstMap{ @@ -101,11 +104,21 @@ const std::map<unsigned short, unsigned short> QFPInstMap{ {Hexagon::V6_vmpy_qf16_mix_hf, Hexagon::V6_vmpy_qf16}, {Hexagon::V6_vmpy_qf32_hf, Hexagon::V6_vmpy_qf32_mix_hf}, {Hexagon::V6_vmpy_qf32_mix_hf, Hexagon::V6_vmpy_qf32_qf16}, - {Hexagon::V6_vmpy_qf32_sf, Hexagon::V6_vmpy_qf32}}; + {Hexagon::V6_vmpy_qf32_sf, Hexagon::V6_vmpy_qf32}, + {Hexagon::V6_vilog2_sf, Hexagon::V6_vilog2_qf32}, + {Hexagon::V6_vilog2_hf, Hexagon::V6_vilog2_qf16}, + {Hexagon::V6_vabs_qf32_sf, Hexagon::V6_vabs_qf32_qf32}, + {Hexagon::V6_vabs_qf16_hf, Hexagon::V6_vabs_qf16_qf16}, + {Hexagon::V6_vneg_qf32_sf, Hexagon::V6_vneg_qf32_qf32}, + {Hexagon::V6_vneg_qf16_hf, Hexagon::V6_vneg_qf16_qf16}}; } // namespace -namespace { +namespace llvm { +FunctionPass *createHexagonQFPOptimizer(); +void initializeHexagonQFPOptimizerPass(PassRegistry &); +} // namespace llvm +namespace { struct HexagonQFPOptimizer : public MachineFunctionPass { public: static char ID; @@ -116,6 +129,10 @@ public: bool optimizeQfp(MachineInstr *MI, MachineBasicBlock *MBB); + bool optimizeQfpTwoOp(MachineInstr *MI, MachineBasicBlock *MBB); + + bool optimizeQfpOneOp(MachineInstr *MI, MachineBasicBlock *MBB); + StringRef getPassName() const override { return HEXAGON_QFP_OPTIMIZER; } void getAnalysisUsage(AnalysisUsage &AU) const override { @@ -142,19 +159,69 @@ FunctionPass *llvm::createHexagonQFPOptimizer() { bool HexagonQFPOptimizer::optimizeQfp(MachineInstr *MI, MachineBasicBlock *MBB) { - // Early exit: - // - if instruction is invalid or has too few operands (QFP ops need 2 sources - // + 1 dest), - // - or does not have a transformation mapping. - if (MI->getNumOperands() < 3) + if (MI->getNumOperands() == 2) + return optimizeQfpOneOp(MI, MBB); + else if (MI->getNumOperands() == 3) + return optimizeQfpTwoOp(MI, MBB); + else return false; +} + +bool HexagonQFPOptimizer::optimizeQfpOneOp(MachineInstr *MI, + MachineBasicBlock *MBB) { + + unsigned Op0F = 0; auto It = QFPInstMap.find(MI->getOpcode()); if (It == QFPInstMap.end()) return false; + unsigned short InstTy = It->second; + // Get the reachind defs of MI + MachineInstr *DefMI = MRI->getVRegDef(MI->getOperand(1).getReg()); + MachineOperand &Res = MI->getOperand(0); + if (!Res.isReg()) + return false; + + LLVM_DEBUG(dbgs() << "\n[Reaching Defs of operands]: "; DefMI->dump()); + MachineInstr *ReachDefDef = nullptr; + + // Get the reaching def of the reaching def to check for W reg def + if (DefMI->getNumOperands() > 1 && DefMI->getOperand(1).isReg() && + DefMI->getOperand(1).getReg().isVirtual()) + ReachDefDef = MRI->getVRegDef(DefMI->getOperand(1).getReg()); + unsigned ReachDefOp = DefMI->getOpcode(); + MachineInstrBuilder MIB; + + // Check if the reaching def is a conversion + if (ReachDefOp == Hexagon::V6_vconv_sf_qf32 || + ReachDefOp == Hexagon::V6_vconv_hf_qf16) { + + // Return if the reaching def of reaching def is W type + if (ReachDefDef && MRI->getRegClass(ReachDefDef->getOperand(0).getReg()) == + &Hexagon::HvxWRRegClass) + return false; + + // Analyze the use operands of the conversion to get their KILL status + MachineOperand &SrcOp = DefMI->getOperand(1); + Op0F = getKillRegState(SrcOp.isKill()); + SrcOp.setIsKill(false); + MIB = BuildMI(*MBB, MI, MI->getDebugLoc(), HII->get(InstTy), Res.getReg()) + .addReg(SrcOp.getReg(), Op0F, SrcOp.getSubReg()); + LLVM_DEBUG(dbgs() << "\n[Inserting]: "; MIB.getInstr()->dump()); + return true; + } + return false; +} + +bool HexagonQFPOptimizer::optimizeQfpTwoOp(MachineInstr *MI, + MachineBasicBlock *MBB) { unsigned Op0F = 0; unsigned Op1F = 0; + auto It = QFPInstMap.find(MI->getOpcode()); + if (It == QFPInstMap.end()) + return false; + unsigned short InstTy = It->second; // Get the reaching defs of MI, DefMI1 and DefMI2 MachineInstr *DefMI1 = nullptr; MachineInstr *DefMI2 = nullptr; @@ -167,6 +234,9 @@ bool HexagonQFPOptimizer::optimizeQfp(MachineInstr *MI, return false; MachineOperand &Res = MI->getOperand(0); + if (!Res.isReg()) + return false; + MachineInstr *Inst1 = nullptr; MachineInstr *Inst2 = nullptr; LLVM_DEBUG(dbgs() << "\n[Reaching Defs of operands]: "; DefMI1->dump(); @@ -185,7 +255,8 @@ bool HexagonQFPOptimizer::optimizeQfp(MachineInstr *MI, unsigned Def2OP = DefMI2->getOpcode(); MachineInstrBuilder MIB; - // Case 1: Both reaching defs of MI are qf to sf/hf conversions + + // Check if the both the reaching defs of MI are qf to sf/hf conversions if ((Def1OP == Hexagon::V6_vconv_sf_qf32 && Def2OP == Hexagon::V6_vconv_sf_qf32) || (Def1OP == Hexagon::V6_vconv_hf_qf16 && @@ -226,7 +297,7 @@ bool HexagonQFPOptimizer::optimizeQfp(MachineInstr *MI, LLVM_DEBUG(dbgs() << "\n[Inserting]: "; MIB.getInstr()->dump()); return true; - // Case 2: Left operand is conversion to sf/hf + // Check if left operand's reaching def is a conversion to sf/hf } else if (((Def1OP == Hexagon::V6_vconv_sf_qf32 && Def2OP != Hexagon::V6_vconv_sf_qf32) || (Def1OP == Hexagon::V6_vconv_hf_qf16 && @@ -250,7 +321,7 @@ bool HexagonQFPOptimizer::optimizeQfp(MachineInstr *MI, LLVM_DEBUG(dbgs() << "\n[Inserting]: "; MIB.getInstr()->dump()); return true; - // Case 2: Left operand is conversion to sf/hf + // Check if right operand's reaching def is a conversion to sf/hf } else if (((Def1OP != Hexagon::V6_vconv_sf_qf32 && Def2OP == Hexagon::V6_vconv_sf_qf32) || (Def1OP != Hexagon::V6_vconv_hf_qf16 && @@ -258,13 +329,6 @@ bool HexagonQFPOptimizer::optimizeQfp(MachineInstr *MI, !DefMI1->isPHI() && (MI->getOpcode() != Hexagon::V6_vmpy_qf32_sf)) { // The second operand of original instruction is converted. - // In "mix" instructions, "qf" operand is always the first operand. - - // Caveat: vsub is not commutative w.r.t operands. - if (InstTy == Hexagon::V6_vsub_qf16_mix || - InstTy == Hexagon::V6_vsub_qf32_mix) - return false; - if (Inst2 && MRI->getRegClass(Inst2->getOperand(0).getReg()) == &Hexagon::HvxWRRegClass) return false; @@ -275,10 +339,26 @@ bool HexagonQFPOptimizer::optimizeQfp(MachineInstr *MI, Op1F = getKillRegState(Src2.isKill()); Src2.setIsKill(false); Op0F = getKillRegState(Src1.isKill()); - MIB = BuildMI(*MBB, MI, MI->getDebugLoc(), HII->get(InstTy), Res.getReg()) - .addReg(Src2.getReg(), Op1F, - Src2.getSubReg()) // Notice the operands are flipped. - .addReg(Src1.getReg(), Op0F, Src1.getSubReg()); + if (InstTy == Hexagon::V6_vsub_qf16_mix || + InstTy == Hexagon::V6_vsub_qf32_mix) { + if (!HST->useHVXV81Ops()) + // vsub_(hf|sf)_mix insts are only avlbl on hvx81+ + return false; + // vsub is not commutative w.r.t. operands -> treat it as a special case + // to choose the correct mix instruction. + if (Def2OP == Hexagon::V6_vconv_sf_qf32) + InstTy = Hexagon::V6_vsub_sf_mix; + else if (Def2OP == Hexagon::V6_vconv_hf_qf16) + InstTy = Hexagon::V6_vsub_hf_mix; + MIB = BuildMI(*MBB, MI, MI->getDebugLoc(), HII->get(InstTy), Res.getReg()) + .addReg(Src1.getReg(), Op0F, Src1.getSubReg()) + .addReg(Src2.getReg(), Op1F, Src2.getSubReg()); + } else { + MIB = BuildMI(*MBB, MI, MI->getDebugLoc(), HII->get(InstTy), Res.getReg()) + .addReg(Src2.getReg(), Op1F, + Src2.getSubReg()) // Notice the operands are flipped. + .addReg(Src1.getReg(), Op0F, Src1.getSubReg()); + } LLVM_DEBUG(dbgs() << "\n[Inserting]: "; MIB.getInstr()->dump()); return true; } @@ -309,15 +389,18 @@ bool HexagonQFPOptimizer::runOnMachineFunction(MachineFunction &MF) { while (MII != MBBI->instr_end()) { MachineInstr *MI = &*MII; ++MII; // As MI might be removed. - - if (QFPInstMap.count(MI->getOpcode()) && - MI->getOpcode() != Hexagon::V6_vconv_sf_qf32 && - MI->getOpcode() != Hexagon::V6_vconv_hf_qf16) { - LLVM_DEBUG(dbgs() << "\n###Analyzing for removal: "; MI->dump()); - if (optimizeQfp(MI, MBB)) { - MI->eraseFromParent(); - LLVM_DEBUG(dbgs() << "\t....Removing...."); - Changed = true; + if (QFPInstMap.count(MI->getOpcode())) { + auto OpC = MI->getOpcode(); + if (DisableQFOptForMul && HII->isQFPMul(MI)) + continue; + if (OpC != Hexagon::V6_vconv_sf_qf32 && + OpC != Hexagon::V6_vconv_hf_qf16) { + LLVM_DEBUG(dbgs() << "\n###Analyzing for removal: "; MI->dump()); + if (optimizeQfp(MI, MBB)) { + MI->eraseFromParent(); + LLVM_DEBUG(dbgs() << "\t....Removing...."); + Changed = true; + } } } } diff --git a/llvm/lib/Target/NVPTX/NVPTXISelLowering.cpp b/llvm/lib/Target/NVPTX/NVPTXISelLowering.cpp index 2f1a7ad..a3deb36 100644 --- a/llvm/lib/Target/NVPTX/NVPTXISelLowering.cpp +++ b/llvm/lib/Target/NVPTX/NVPTXISelLowering.cpp @@ -305,7 +305,8 @@ static void ComputePTXValueVTs(const TargetLowering &TLI, const DataLayout &DL, uint64_t StartingOffset = 0) { SmallVector<EVT, 16> TempVTs; SmallVector<uint64_t, 16> TempOffsets; - ComputeValueVTs(TLI, DL, Ty, TempVTs, &TempOffsets, StartingOffset); + ComputeValueVTs(TLI, DL, Ty, TempVTs, /*MemVTs=*/nullptr, &TempOffsets, + StartingOffset); for (const auto [VT, Off] : zip(TempVTs, TempOffsets)) { MVT RegisterVT = TLI.getRegisterTypeForCallingConv(Ctx, CallConv, VT); diff --git a/llvm/lib/Target/PowerPC/PPCAsmPrinter.cpp b/llvm/lib/Target/PowerPC/PPCAsmPrinter.cpp index 780e124..122738c 100644 --- a/llvm/lib/Target/PowerPC/PPCAsmPrinter.cpp +++ b/llvm/lib/Target/PowerPC/PPCAsmPrinter.cpp @@ -2750,6 +2750,10 @@ void PPCAIXAsmPrinter::emitGlobalVariable(const GlobalVariable *GV) { if (isSpecialLLVMGlobalArrayToSkip(GV) || isSpecialLLVMGlobalArrayForStaticInit(GV)) return; + // Ignore non-emitted data. + if (GV->getSection() == "llvm.metadata") + return; + // If the Global Variable has the toc-data attribute, it needs to be emitted // when we emit the .toc section. if (GV->hasAttribute("toc-data")) { diff --git a/llvm/lib/Target/RISCV/RISCVFrameLowering.cpp b/llvm/lib/Target/RISCV/RISCVFrameLowering.cpp index b37b740..f881c4c 100644 --- a/llvm/lib/Target/RISCV/RISCVFrameLowering.cpp +++ b/llvm/lib/Target/RISCV/RISCVFrameLowering.cpp @@ -789,6 +789,8 @@ void RISCVFrameLowering::allocateStack(MachineBasicBlock &MBB, // Unroll the probe loop depending on the number of iterations. if (Offset < ProbeSize * 5) { + uint64_t CFAAdjust = RealStackSize - Offset; + uint64_t CurrentOffset = 0; while (CurrentOffset + ProbeSize <= Offset) { RI->adjustReg(MBB, MBBI, DL, SPReg, SPReg, @@ -802,7 +804,7 @@ void RISCVFrameLowering::allocateStack(MachineBasicBlock &MBB, CurrentOffset += ProbeSize; if (EmitCFI) - CFIBuilder.buildDefCFAOffset(CurrentOffset); + CFIBuilder.buildDefCFAOffset(CurrentOffset + CFAAdjust); } uint64_t Residual = Offset - CurrentOffset; @@ -810,7 +812,7 @@ void RISCVFrameLowering::allocateStack(MachineBasicBlock &MBB, RI->adjustReg(MBB, MBBI, DL, SPReg, SPReg, StackOffset::getFixed(-Residual), Flag, getStackAlign()); if (EmitCFI) - CFIBuilder.buildDefCFAOffset(Offset); + CFIBuilder.buildDefCFAOffset(RealStackSize); if (DynAllocation) { // s[d|w] zero, 0(sp) diff --git a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp index 995ae75..3b69eda 100644 --- a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp +++ b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp @@ -17867,6 +17867,7 @@ static SDValue combineOp_VLToVWOp_VL(SDNode *N, SmallVector<SDNode *> Worklist; SmallPtrSet<SDNode *, 8> Inserted; + SmallPtrSet<SDNode *, 8> ExtensionsToRemove; Worklist.push_back(N); Inserted.insert(N); SmallVector<CombineResult> CombinesToApply; @@ -17876,22 +17877,25 @@ static SDValue combineOp_VLToVWOp_VL(SDNode *N, NodeExtensionHelper LHS(Root, 0, DAG, Subtarget); NodeExtensionHelper RHS(Root, 1, DAG, Subtarget); - auto AppendUsersIfNeeded = [&Worklist, &Subtarget, - &Inserted](const NodeExtensionHelper &Op) { - if (Op.needToPromoteOtherUsers()) { - for (SDUse &Use : Op.OrigOperand->uses()) { - SDNode *TheUser = Use.getUser(); - if (!NodeExtensionHelper::isSupportedRoot(TheUser, Subtarget)) - return false; - // We only support the first 2 operands of FMA. - if (Use.getOperandNo() >= 2) - return false; - if (Inserted.insert(TheUser).second) - Worklist.push_back(TheUser); - } - } - return true; - }; + auto AppendUsersIfNeeded = + [&Worklist, &Subtarget, &Inserted, + &ExtensionsToRemove](const NodeExtensionHelper &Op) { + if (Op.needToPromoteOtherUsers()) { + // Remember that we're supposed to remove this extension. + ExtensionsToRemove.insert(Op.OrigOperand.getNode()); + for (SDUse &Use : Op.OrigOperand->uses()) { + SDNode *TheUser = Use.getUser(); + if (!NodeExtensionHelper::isSupportedRoot(TheUser, Subtarget)) + return false; + // We only support the first 2 operands of FMA. + if (Use.getOperandNo() >= 2) + return false; + if (Inserted.insert(TheUser).second) + Worklist.push_back(TheUser); + } + } + return true; + }; // Control the compile time by limiting the number of node we look at in // total. @@ -17912,6 +17916,15 @@ static SDValue combineOp_VLToVWOp_VL(SDNode *N, std::optional<CombineResult> Res = FoldingStrategy(Root, LHS, RHS, DAG, Subtarget); if (Res) { + // If this strategy wouldn't remove an extension we're supposed to + // remove, reject it. + if (!Res->LHSExt.has_value() && + ExtensionsToRemove.contains(LHS.OrigOperand.getNode())) + continue; + if (!Res->RHSExt.has_value() && + ExtensionsToRemove.contains(RHS.OrigOperand.getNode())) + continue; + Matched = true; CombinesToApply.push_back(*Res); // All the inputs that are extended need to be folded, otherwise diff --git a/llvm/lib/Target/RISCV/RISCVSchedSpacemitX60.td b/llvm/lib/Target/RISCV/RISCVSchedSpacemitX60.td index 24ebbc3..41071b2 100644 --- a/llvm/lib/Target/RISCV/RISCVSchedSpacemitX60.td +++ b/llvm/lib/Target/RISCV/RISCVSchedSpacemitX60.td @@ -654,8 +654,17 @@ foreach mx = SchedMxList in { foreach sew = SchedSEWSet<mx>.val in { defvar IsWorstCase = SMX60IsWorstCaseMXSEW<mx, sew, SchedMxList>.c; - defm "" : LMULSEWWriteResMXSEW<"WriteVIRedV_From", [SMX60_VIEU], mx, sew, IsWorstCase>; - defm "" : LMULSEWWriteResMXSEW<"WriteVIRedMinMaxV_From", [SMX60_VIEU], mx, sew, IsWorstCase>; + defvar VIRedLat = GetLMULValue<[5, 5, 5, 7, 11, 19, 35], mx>.c; + defvar VIRedOcc = GetLMULValue<[1, 1, 2, 2, 4, 10, 35], mx>.c; + let Latency = VIRedLat, ReleaseAtCycles = [VIRedOcc] in { + defm "" : LMULSEWWriteResMXSEW<"WriteVIRedMinMaxV_From", [SMX60_VIEU], mx, sew, IsWorstCase>; + + // Pattern for vredsum: 5/5/5/7/11/19/35 + // Pattern for vredand, vredor, vredxor: 4/4/4/6/10/18/34 + // They are grouped together, so we use the worst-case vredsum latency. + // TODO: split vredand, vredor, vredxor into separate scheduling classe. + defm "" : LMULSEWWriteResMXSEW<"WriteVIRedV_From", [SMX60_VIEU], mx, sew, IsWorstCase>; + } } } @@ -663,7 +672,27 @@ foreach mx = SchedMxListWRed in { foreach sew = SchedSEWSet<mx, 0, 1>.val in { defvar IsWorstCase = SMX60IsWorstCaseMXSEW<mx, sew, SchedMxListWRed>.c; - defm "" : LMULSEWWriteResMXSEW<"WriteVIWRedV_From", [SMX60_VIEU], mx, sew, IsWorstCase>; + defvar VIRedLat = GetLMULValue<[5, 5, 5, 7, 11, 19, 35], mx>.c; + defvar VIRedOcc = GetLMULValue<[1, 1, 2, 2, 4, 10, 35], mx>.c; + let Latency = VIRedLat, ReleaseAtCycles = [VIRedOcc] in { + defm "" : LMULSEWWriteResMXSEW<"WriteVIWRedV_From", [SMX60_VIEU], mx, sew, IsWorstCase>; + } + } +} + +foreach mx = SchedMxListF in { + foreach sew = SchedSEWSet<mx, 1>.val in { + defvar IsWorstCase = SMX60IsWorstCaseMXSEW<mx, sew, SchedMxListF, 1>.c; + + // Latency for vfredmax.vs, vfredmin.vs: 12/12/15/21/33/57 + // Latency for vfredusum.vs is slightly lower for e16/e32 + // We use the worst-case + defvar VFRedLat = GetLMULValue<[12, 12, 12, 15, 21, 33, 57], mx>.c; + defvar VFRedOcc = GetLMULValue<[8, 8, 8, 8, 14, 20, 57], mx>.c; + let Latency = VFRedLat, ReleaseAtCycles = [VFRedOcc] in { + defm "" : LMULSEWWriteResMXSEW<"WriteVFRedV_From", [SMX60_VFP], mx, sew, IsWorstCase>; + defm "" : LMULSEWWriteResMXSEW<"WriteVFRedMinMaxV_From", [SMX60_VFP], mx, sew, IsWorstCase>; + } } } @@ -671,9 +700,20 @@ foreach mx = SchedMxListF in { foreach sew = SchedSEWSet<mx, 1>.val in { defvar IsWorstCase = SMX60IsWorstCaseMXSEW<mx, sew, SchedMxListF, 1>.c; - defm "" : LMULSEWWriteResMXSEW<"WriteVFRedV_From", [SMX60_VFP], mx, sew, IsWorstCase>; - defm "" : LMULSEWWriteResMXSEW<"WriteVFRedOV_From", [SMX60_VFP], mx, sew, IsWorstCase>; - defm "" : LMULSEWWriteResMXSEW<"WriteVFRedMinMaxV_From", [SMX60_VFP], mx, sew, IsWorstCase>; + // Compute latency based on SEW + defvar VFRedOV_FromLat = !cond( + !eq(sew, 16) : ConstValueUntilLMULThenDouble<"MF4", 12, mx>.c, + !eq(sew, 32) : ConstValueUntilLMULThenDouble<"MF2", 12, mx>.c, + !eq(sew, 64) : ConstValueUntilLMULThenDouble<"M1", 12, mx>.c + ); + defvar VFRedOV_FromOcc = !cond( + !eq(sew, 16) : GetLMULValue<[8, 8, 20, 24, 48, 96, 384], mx>.c, + !eq(sew, 32) : GetLMULValue<[8, 8, 8, 12, 24, 48, 192], mx>.c, + !eq(sew, 64) : GetLMULValue<[6, 6, 6, 6, 12, 24, 96], mx>.c + ); + let Latency = VFRedOV_FromLat, ReleaseAtCycles = [VFRedOV_FromOcc] in { + defm "" : LMULSEWWriteResMXSEW<"WriteVFRedOV_From", [SMX60_VFP], mx, sew, IsWorstCase>; + } } } @@ -681,8 +721,18 @@ foreach mx = SchedMxListFWRed in { foreach sew = SchedSEWSet<mx, 1, 1>.val in { defvar IsWorstCase = SMX60IsWorstCaseMXSEW<mx, sew, SchedMxListFWRed, 1>.c; - defm "" : LMULSEWWriteResMXSEW<"WriteVFWRedV_From", [SMX60_VFP], mx, sew, IsWorstCase>; - defm "" : LMULSEWWriteResMXSEW<"WriteVFWRedOV_From", [SMX60_VFP], mx, sew, IsWorstCase>; + defvar VFRedOVLat = !cond( + !eq(sew, 16) : ConstValueUntilLMULThenDouble<"MF4", 16, mx>.c, + !eq(sew, 32) : ConstValueUntilLMULThenDouble<"MF2", 16, mx>.c, + ); + defvar VFRedOVOcc = !cond( + !eq(sew, 16) : GetLMULValue<[11, 11, 27, 32, 64, 128, 512], mx>.c, + !eq(sew, 32) : GetLMULValue<[11, 11, 11, 16, 32, 64, 256], mx>.c, + ); + let Latency = VFRedOVLat, ReleaseAtCycles = [VFRedOVOcc] in { + defm "" : LMULSEWWriteResMXSEW<"WriteVFWRedV_From", [SMX60_VFP], mx, sew, IsWorstCase>; + defm "" : LMULSEWWriteResMXSEW<"WriteVFWRedOV_From", [SMX60_VFP], mx, sew, IsWorstCase>; + } } } diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp index 168e041..d103953 100644 --- a/llvm/lib/Target/X86/X86ISelLowering.cpp +++ b/llvm/lib/Target/X86/X86ISelLowering.cpp @@ -53354,6 +53354,7 @@ static SDValue combineMaskedStore(SDNode *N, SelectionDAG &DAG, // i32 sub value. static SDValue narrowBitOpRMW(StoreSDNode *St, const SDLoc &DL, SelectionDAG &DAG, + TargetLowering::DAGCombinerInfo &DCI, const X86Subtarget &Subtarget) { using namespace SDPatternMatch; SDValue StoredVal = St->getValue(); @@ -53451,6 +53452,8 @@ static SDValue narrowBitOpRMW(StoreSDNode *St, const SDLoc &DL, if (!StoredVal.hasOneUse()) { SDValue NewLoad = DAG.getLoad(VT, DL, NewStore, Ld->getBasePtr(), Ld->getMemOperand()); + for (SDNode *User : StoredVal->users()) + DCI.AddToWorklist(User); DAG.ReplaceAllUsesWith(StoredVal, NewLoad); } return NewStore; @@ -53682,7 +53685,7 @@ static SDValue combineStore(SDNode *N, SelectionDAG &DAG, } } - if (SDValue R = narrowBitOpRMW(St, dl, DAG, Subtarget)) + if (SDValue R = narrowBitOpRMW(St, dl, DAG, DCI, Subtarget)) return R; // Convert store(cmov(load(p), x, CC), p) to cstore(x, p, CC) diff --git a/llvm/lib/TargetParser/TargetParser.cpp b/llvm/lib/TargetParser/TargetParser.cpp index 975a271..96bef0e 100644 --- a/llvm/lib/TargetParser/TargetParser.cpp +++ b/llvm/lib/TargetParser/TargetParser.cpp @@ -174,8 +174,8 @@ constexpr GPUInfo AMDGCNGPUs[] = { {{"gfx1153"}, {"gfx1153"}, GK_GFX1153, FEATURE_FAST_FMA_F32|FEATURE_FAST_DENORMAL_F32|FEATURE_WAVE32|FEATURE_WGP}, {{"gfx1200"}, {"gfx1200"}, GK_GFX1200, FEATURE_FAST_FMA_F32|FEATURE_FAST_DENORMAL_F32|FEATURE_WAVE32|FEATURE_WGP}, {{"gfx1201"}, {"gfx1201"}, GK_GFX1201, FEATURE_FAST_FMA_F32|FEATURE_FAST_DENORMAL_F32|FEATURE_WAVE32|FEATURE_WGP}, - {{"gfx1250"}, {"gfx1250"}, GK_GFX1250, FEATURE_FAST_FMA_F32|FEATURE_FAST_DENORMAL_F32|FEATURE_WAVE32}, - {{"gfx1251"}, {"gfx1251"}, GK_GFX1251, FEATURE_FAST_FMA_F32|FEATURE_FAST_DENORMAL_F32|FEATURE_WAVE32}, + {{"gfx1250"}, {"gfx1250"}, GK_GFX1250, FEATURE_FAST_FMA_F32|FEATURE_FAST_DENORMAL_F32|FEATURE_WAVE32|FEATURE_XNACK_ALWAYS}, + {{"gfx1251"}, {"gfx1251"}, GK_GFX1251, FEATURE_FAST_FMA_F32|FEATURE_FAST_DENORMAL_F32|FEATURE_WAVE32|FEATURE_XNACK_ALWAYS}, {{"gfx9-generic"}, {"gfx9-generic"}, GK_GFX9_GENERIC, FEATURE_FAST_FMA_F32|FEATURE_FAST_DENORMAL_F32|FEATURE_XNACK}, {{"gfx10-1-generic"}, {"gfx10-1-generic"}, GK_GFX10_1_GENERIC, FEATURE_FAST_FMA_F32|FEATURE_FAST_DENORMAL_F32|FEATURE_WAVE32|FEATURE_XNACK|FEATURE_WGP}, diff --git a/llvm/lib/Transforms/Instrumentation/PGOInstrumentation.cpp b/llvm/lib/Transforms/Instrumentation/PGOInstrumentation.cpp index af53fa0..02f06be 100644 --- a/llvm/lib/Transforms/Instrumentation/PGOInstrumentation.cpp +++ b/llvm/lib/Transforms/Instrumentation/PGOInstrumentation.cpp @@ -734,7 +734,7 @@ void FuncPGOInstrumentation<Edge, BBInfo>::computeCFGHash() { FunctionHash = (((uint64_t)JCH.getCRC()) << 28) + JC.getCRC(); // Reserve bit 60-63 for other information purpose. - FunctionHash &= 0x0FFFFFFFFFFFFFFF; + FunctionHash &= NamedInstrProfRecord::FUNC_HASH_MASK; if (IsCS) NamedInstrProfRecord::setCSFlagInHash(FunctionHash); LLVM_DEBUG(dbgs() << "Function Hash Computation for " << F.getName() << ":\n" diff --git a/llvm/lib/Transforms/Utils/BreakCriticalEdges.cpp b/llvm/lib/Transforms/Utils/BreakCriticalEdges.cpp index 42b1fdf..8aa8aa2 100644 --- a/llvm/lib/Transforms/Utils/BreakCriticalEdges.cpp +++ b/llvm/lib/Transforms/Utils/BreakCriticalEdges.cpp @@ -39,36 +39,36 @@ using namespace llvm; STATISTIC(NumBroken, "Number of blocks inserted"); namespace { - struct BreakCriticalEdges : public FunctionPass { - static char ID; // Pass identification, replacement for typeid - BreakCriticalEdges() : FunctionPass(ID) { - initializeBreakCriticalEdgesPass(*PassRegistry::getPassRegistry()); - } +struct BreakCriticalEdges : public FunctionPass { + static char ID; // Pass identification, replacement for typeid + BreakCriticalEdges() : FunctionPass(ID) { + initializeBreakCriticalEdgesPass(*PassRegistry::getPassRegistry()); + } - bool runOnFunction(Function &F) override { - auto *DTWP = getAnalysisIfAvailable<DominatorTreeWrapperPass>(); - auto *DT = DTWP ? &DTWP->getDomTree() : nullptr; + bool runOnFunction(Function &F) override { + auto *DTWP = getAnalysisIfAvailable<DominatorTreeWrapperPass>(); + auto *DT = DTWP ? &DTWP->getDomTree() : nullptr; - auto *PDTWP = getAnalysisIfAvailable<PostDominatorTreeWrapperPass>(); - auto *PDT = PDTWP ? &PDTWP->getPostDomTree() : nullptr; + auto *PDTWP = getAnalysisIfAvailable<PostDominatorTreeWrapperPass>(); + auto *PDT = PDTWP ? &PDTWP->getPostDomTree() : nullptr; - auto *LIWP = getAnalysisIfAvailable<LoopInfoWrapperPass>(); - auto *LI = LIWP ? &LIWP->getLoopInfo() : nullptr; - unsigned N = - SplitAllCriticalEdges(F, CriticalEdgeSplittingOptions(DT, LI, nullptr, PDT)); - NumBroken += N; - return N > 0; - } + auto *LIWP = getAnalysisIfAvailable<LoopInfoWrapperPass>(); + auto *LI = LIWP ? &LIWP->getLoopInfo() : nullptr; + unsigned N = SplitAllCriticalEdges( + F, CriticalEdgeSplittingOptions(DT, LI, nullptr, PDT)); + NumBroken += N; + return N > 0; + } - void getAnalysisUsage(AnalysisUsage &AU) const override { - AU.addPreserved<DominatorTreeWrapperPass>(); - AU.addPreserved<LoopInfoWrapperPass>(); + void getAnalysisUsage(AnalysisUsage &AU) const override { + AU.addPreserved<DominatorTreeWrapperPass>(); + AU.addPreserved<LoopInfoWrapperPass>(); - // No loop canonicalization guarantees are broken by this pass. - AU.addPreservedID(LoopSimplifyID); - } - }; -} + // No loop canonicalization guarantees are broken by this pass. + AU.addPreservedID(LoopSimplifyID); + } +}; +} // namespace char BreakCriticalEdges::ID = 0; INITIALIZE_PASS(BreakCriticalEdges, "break-crit-edges", @@ -76,6 +76,7 @@ INITIALIZE_PASS(BreakCriticalEdges, "break-crit-edges", // Publicly exposed interface to pass... char &llvm::BreakCriticalEdgesID = BreakCriticalEdges::ID; + FunctionPass *llvm::createBreakCriticalEdgesPass() { return new BreakCriticalEdges(); } diff --git a/llvm/lib/Transforms/Utils/BypassSlowDivision.cpp b/llvm/lib/Transforms/Utils/BypassSlowDivision.cpp index 7343c79..9f6d89e 100644 --- a/llvm/lib/Transforms/Utils/BypassSlowDivision.cpp +++ b/llvm/lib/Transforms/Utils/BypassSlowDivision.cpp @@ -40,22 +40,22 @@ using namespace llvm; namespace { - struct QuotRemPair { - Value *Quotient; - Value *Remainder; - - QuotRemPair(Value *InQuotient, Value *InRemainder) - : Quotient(InQuotient), Remainder(InRemainder) {} - }; - - /// A quotient and remainder, plus a BB from which they logically "originate". - /// If you use Quotient or Remainder in a Phi node, you should use BB as its - /// corresponding predecessor. - struct QuotRemWithBB { - BasicBlock *BB = nullptr; - Value *Quotient = nullptr; - Value *Remainder = nullptr; - }; +struct QuotRemPair { + Value *Quotient; + Value *Remainder; + + QuotRemPair(Value *InQuotient, Value *InRemainder) + : Quotient(InQuotient), Remainder(InRemainder) {} +}; + +/// A quotient and remainder, plus a BB from which they logically "originate". +/// If you use Quotient or Remainder in a Phi node, you should use BB as its +/// corresponding predecessor. +struct QuotRemWithBB { + BasicBlock *BB = nullptr; + Value *Quotient = nullptr; + Value *Remainder = nullptr; +}; using DivCacheTy = DenseMap<DivRemMapKey, QuotRemPair>; using BypassWidthsTy = DenseMap<unsigned, unsigned>; diff --git a/llvm/lib/Transforms/Utils/LoopSimplify.cpp b/llvm/lib/Transforms/Utils/LoopSimplify.cpp index 61ffb49..8da6a980 100644 --- a/llvm/lib/Transforms/Utils/LoopSimplify.cpp +++ b/llvm/lib/Transforms/Utils/LoopSimplify.cpp @@ -378,7 +378,7 @@ static BasicBlock *insertUniqueBackedgeBlock(Loop *L, BasicBlock *Preheader, if (P != Preheader) BackedgeBlocks.push_back(P); } - // Create and insert the new backedge block... + // Create and insert the new backedge block. BasicBlock *BEBlock = BasicBlock::Create(Header->getContext(), Header->getName() + ".backedge", F); BranchInst *BETerminator = BranchInst::Create(Header, BEBlock); @@ -737,39 +737,39 @@ bool llvm::simplifyLoop(Loop *L, DominatorTree *DT, LoopInfo *LI, } namespace { - struct LoopSimplify : public FunctionPass { - static char ID; // Pass identification, replacement for typeid - LoopSimplify() : FunctionPass(ID) { - initializeLoopSimplifyPass(*PassRegistry::getPassRegistry()); - } +struct LoopSimplify : public FunctionPass { + static char ID; // Pass identification, replacement for typeid + LoopSimplify() : FunctionPass(ID) { + initializeLoopSimplifyPass(*PassRegistry::getPassRegistry()); + } - bool runOnFunction(Function &F) override; + bool runOnFunction(Function &F) override; - void getAnalysisUsage(AnalysisUsage &AU) const override { - AU.addRequired<AssumptionCacheTracker>(); + void getAnalysisUsage(AnalysisUsage &AU) const override { + AU.addRequired<AssumptionCacheTracker>(); - // We need loop information to identify the loops... - AU.addRequired<DominatorTreeWrapperPass>(); - AU.addPreserved<DominatorTreeWrapperPass>(); + // We need loop information to identify the loops. + AU.addRequired<DominatorTreeWrapperPass>(); + AU.addPreserved<DominatorTreeWrapperPass>(); - AU.addRequired<LoopInfoWrapperPass>(); - AU.addPreserved<LoopInfoWrapperPass>(); + AU.addRequired<LoopInfoWrapperPass>(); + AU.addPreserved<LoopInfoWrapperPass>(); - AU.addPreserved<BasicAAWrapperPass>(); - AU.addPreserved<AAResultsWrapperPass>(); - AU.addPreserved<GlobalsAAWrapperPass>(); - AU.addPreserved<ScalarEvolutionWrapperPass>(); - AU.addPreserved<SCEVAAWrapperPass>(); - AU.addPreservedID(LCSSAID); - AU.addPreservedID(BreakCriticalEdgesID); // No critical edges added. - AU.addPreserved<BranchProbabilityInfoWrapperPass>(); - AU.addPreserved<MemorySSAWrapperPass>(); - } + AU.addPreserved<BasicAAWrapperPass>(); + AU.addPreserved<AAResultsWrapperPass>(); + AU.addPreserved<GlobalsAAWrapperPass>(); + AU.addPreserved<ScalarEvolutionWrapperPass>(); + AU.addPreserved<SCEVAAWrapperPass>(); + AU.addPreservedID(LCSSAID); + AU.addPreservedID(BreakCriticalEdgesID); // No critical edges added. + AU.addPreserved<BranchProbabilityInfoWrapperPass>(); + AU.addPreserved<MemorySSAWrapperPass>(); + } - /// verifyAnalysis() - Verify LoopSimplifyForm's guarantees. - void verifyAnalysis() const override; - }; -} + /// verifyAnalysis() - Verify LoopSimplifyForm's guarantees. + void verifyAnalysis() const override; +}; +} // namespace char LoopSimplify::ID = 0; INITIALIZE_PASS_BEGIN(LoopSimplify, "loop-simplify", @@ -780,12 +780,12 @@ INITIALIZE_PASS_DEPENDENCY(LoopInfoWrapperPass) INITIALIZE_PASS_END(LoopSimplify, "loop-simplify", "Canonicalize natural loops", false, false) -// Publicly exposed interface to pass... +// Publicly exposed interface to pass. char &llvm::LoopSimplifyID = LoopSimplify::ID; Pass *llvm::createLoopSimplifyPass() { return new LoopSimplify(); } /// runOnFunction - Run down all loops in the CFG (recursively, but we could do -/// it in any convenient order) inserting preheaders... +/// it in any convenient order) inserting preheaders. /// bool LoopSimplify::runOnFunction(Function &F) { bool Changed = false; diff --git a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp index 906fa2f..b7224a3 100644 --- a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp +++ b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp @@ -7933,6 +7933,26 @@ void VPRecipeBuilder::collectScaledReductions(VFRange &Range) { (!Chain.ExtendB || ExtendIsOnlyUsedByPartialReductions(Chain.ExtendB))) ScaledReductionMap.try_emplace(Chain.Reduction, Pair.second); } + + // Check that all partial reductions in a chain are only used by other + // partial reductions with the same scale factor. Otherwise we end up creating + // users of scaled reductions where the types of the other operands don't + // match. + for (const auto &[Chain, Scale] : PartialReductionChains) { + auto AllUsersPartialRdx = [ScaleVal = Scale, this](const User *U) { + auto *UI = cast<Instruction>(U); + if (isa<PHINode>(UI) && UI->getParent() == OrigLoop->getHeader()) { + return all_of(UI->users(), [ScaleVal, this](const User *U) { + auto *UI = cast<Instruction>(U); + return ScaledReductionMap.lookup_or(UI, 0) == ScaleVal; + }); + } + return ScaledReductionMap.lookup_or(UI, 0) == ScaleVal || + !OrigLoop->contains(UI->getParent()); + }; + if (!all_of(Chain.Reduction->users(), AllUsersPartialRdx)) + ScaledReductionMap.erase(Chain.Reduction); + } } bool VPRecipeBuilder::getScaledReductions( @@ -8116,11 +8136,8 @@ VPRecipeBase *VPRecipeBuilder::tryToCreateWidenRecipe(VPSingleDefRecipe *R, if (isa<LoadInst>(Instr) || isa<StoreInst>(Instr)) return tryToWidenMemory(Instr, Operands, Range); - if (std::optional<unsigned> ScaleFactor = getScalingForReduction(Instr)) { - if (auto PartialRed = - tryToCreatePartialReduction(Instr, Operands, ScaleFactor.value())) - return PartialRed; - } + if (std::optional<unsigned> ScaleFactor = getScalingForReduction(Instr)) + return tryToCreatePartialReduction(Instr, Operands, ScaleFactor.value()); if (!shouldWiden(Instr, Range)) return nullptr; @@ -8154,9 +8171,9 @@ VPRecipeBuilder::tryToCreatePartialReduction(Instruction *Reduction, isa<VPPartialReductionRecipe>(BinOpRecipe)) std::swap(BinOp, Accumulator); - if (ScaleFactor != - vputils::getVFScaleFactor(Accumulator->getDefiningRecipe())) - return nullptr; + assert(ScaleFactor == + vputils::getVFScaleFactor(Accumulator->getDefiningRecipe()) && + "all accumulators in chain must have same scale factor"); unsigned ReductionOpcode = Reduction->getOpcode(); if (ReductionOpcode == Instruction::Sub) { diff --git a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp index bf3f52c..df835a0 100644 --- a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp +++ b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp @@ -20996,6 +20996,15 @@ BoUpSLP::BlockScheduling::tryScheduleBundle(ArrayRef<Value *> VL, BoUpSLP *SLP, return false; })) return std::nullopt; + if (S.areInstructionsWithCopyableElements() && EI && EI.UserTE->hasState() && + EI.UserTE->hasCopyableElements() && + EI.UserTE->getMainOp()->getParent() == S.getMainOp()->getParent() && + all_of(VL, [&](Value *V) { + if (S.isCopyableElement(V)) + return true; + return isUsedOutsideBlock(V); + })) + return std::nullopt; bool HasCopyables = S.areInstructionsWithCopyableElements(); if (((!HasCopyables && doesNotNeedToSchedule(VL)) || all_of(VL, [&](Value *V) { return S.isNonSchedulable(V); }))) { |
