diff options
Diffstat (limited to 'llvm/lib')
69 files changed, 965 insertions, 604 deletions
diff --git a/llvm/lib/Analysis/ValueTracking.cpp b/llvm/lib/Analysis/ValueTracking.cpp index 371ad41..edbeede 100644 --- a/llvm/lib/Analysis/ValueTracking.cpp +++ b/llvm/lib/Analysis/ValueTracking.cpp @@ -1023,11 +1023,44 @@ static void computeKnownBitsFromOperator(const Operator *I, break; } case Instruction::Select: { - computeKnownBits(I->getOperand(2), Known, Depth + 1, Q); - computeKnownBits(I->getOperand(1), Known2, Depth + 1, Q); - + auto ComputeForArm = [&](Value *Arm, bool Invert) { + KnownBits Res(Known.getBitWidth()); + computeKnownBits(Arm, Res, Depth + 1, Q); + // If we have a constant arm, we are done. + if (Res.isConstant()) + return Res; + + // See what condition implies about the bits of the two select arms. + KnownBits CondRes(Res.getBitWidth()); + computeKnownBitsFromCond(Arm, I->getOperand(0), CondRes, Depth + 1, Q, + Invert); + // If we don't get any information from the condition, no reason to + // proceed. + if (CondRes.isUnknown()) + return Res; + + // We can have conflict if the condition is dead. I.e if we have + // (x | 64) < 32 ? (x | 64) : y + // we will have conflict at bit 6 from the condition/the `or`. + // In that case just return. Its not particularly important + // what we do, as this select is going to be simplified soon. + CondRes = CondRes.unionWith(Res); + if (CondRes.hasConflict()) + return Res; + + // Finally make sure the information we found is valid. This is relatively + // expensive so it's left for the very end. + if (!isGuaranteedNotToBeUndef(Arm, Q.AC, Q.CxtI, Q.DT, Depth + 1)) + return Res; + + // Finally, we know we get information from the condition and its valid, + // so return it. + return CondRes; + }; // Only known if known in both the LHS and RHS. - Known = Known.intersectWith(Known2); + Known = + ComputeForArm(I->getOperand(1), /*Invert=*/false) + .intersectWith(ComputeForArm(I->getOperand(2), /*Invert=*/true)); break; } case Instruction::FPTrunc: @@ -5709,7 +5742,7 @@ llvm::FindInsertedValue(Value *V, ArrayRef<unsigned> idx_range, // looking for, then. if (*req_idx != *i) return FindInsertedValue(I->getAggregateOperand(), idx_range, - *InsertBefore); + InsertBefore); } // If we end up here, the indices of the insertvalue match with those // requested (though possibly only partially). Now we recursively look at diff --git a/llvm/lib/Bitcode/Writer/BitcodeWriterPass.cpp b/llvm/lib/Bitcode/Writer/BitcodeWriterPass.cpp index 93fb2a8..0eb9c24 100644 --- a/llvm/lib/Bitcode/Writer/BitcodeWriterPass.cpp +++ b/llvm/lib/Bitcode/Writer/BitcodeWriterPass.cpp @@ -19,7 +19,7 @@ using namespace llvm; PreservedAnalyses BitcodeWriterPass::run(Module &M, ModuleAnalysisManager &AM) { - // RemoveDIs: there's no bitcode representation of the DPValue debug-info, + // RemoveDIs: there's no bitcode representation of the DbgRecord debug-info, // convert to dbg.values before writing out. bool IsNewDbgInfoFormat = M.IsNewDbgInfoFormat; if (IsNewDbgInfoFormat) @@ -56,8 +56,8 @@ namespace { StringRef getPassName() const override { return "Bitcode Writer"; } bool runOnModule(Module &M) override { - // RemoveDIs: there's no bitcode representation of the DPValue debug-info, - // convert to dbg.values before writing out. + // RemoveDIs: there's no bitcode representation of the DbgRecord + // debug-info, convert to dbg.values before writing out. bool IsNewDbgInfoFormat = M.IsNewDbgInfoFormat; if (IsNewDbgInfoFormat) M.convertFromNewDbgValues(); diff --git a/llvm/lib/CodeGen/AsmPrinter/AccelTable.cpp b/llvm/lib/CodeGen/AsmPrinter/AccelTable.cpp index 55cdc3c..2e8e7d0 100644 --- a/llvm/lib/CodeGen/AsmPrinter/AccelTable.cpp +++ b/llvm/lib/CodeGen/AsmPrinter/AccelTable.cpp @@ -369,7 +369,8 @@ void AppleAccelTableWriter::emit() const { DWARF5AccelTableData::DWARF5AccelTableData(const DIE &Die, const uint32_t UnitID, const bool IsTU) - : OffsetVal(&Die), DieTag(Die.getTag()), IsTU(IsTU), UnitID(UnitID) {} + : OffsetVal(&Die), DieTag(Die.getTag()), AbbrevNumber(0), IsTU(IsTU), + UnitID(UnitID) {} void Dwarf5AccelTableWriter::Header::emit(Dwarf5AccelTableWriter &Ctx) { assert(CompUnitCount > 0 && "Index must have at least one CU."); diff --git a/llvm/lib/CodeGen/AssignmentTrackingAnalysis.cpp b/llvm/lib/CodeGen/AssignmentTrackingAnalysis.cpp index a4b819a..746926e56 100644 --- a/llvm/lib/CodeGen/AssignmentTrackingAnalysis.cpp +++ b/llvm/lib/CodeGen/AssignmentTrackingAnalysis.cpp @@ -217,13 +217,14 @@ void FunctionVarLocs::init(FunctionVarLocsBuilder &Builder) { // to the start and end position in the vector with VarLocsBeforeInst. This // block includes VarLocs for any DPValues attached to that instruction. for (auto &P : Builder.VarLocsBeforeInst) { - // Process VarLocs attached to a DPValue alongside their marker Instruction. + // Process VarLocs attached to a DbgRecord alongside their marker + // Instruction. if (isa<const DbgRecord *>(P.first)) continue; const Instruction *I = cast<const Instruction *>(P.first); unsigned BlockStart = VarLocRecords.size(); - // Any VarLocInfos attached to a DPValue should now be remapped to their - // marker Instruction, in order of DPValue appearance and prior to any + // Any VarLocInfos attached to a DbgRecord should now be remapped to their + // marker Instruction, in order of DbgRecord appearance and prior to any // VarLocInfos attached directly to that instruction. for (const DPValue &DPV : DPValue::filter(I->getDbgRecordRange())) { // Even though DPV defines a variable location, VarLocsBeforeInst can @@ -1649,7 +1650,7 @@ void AssignmentTrackingLowering::processUntaggedInstruction( Ops.push_back(dwarf::DW_OP_deref); DIE = DIExpression::prependOpcodes(DIE, Ops, /*StackValue=*/false, /*EntryValue=*/false); - // Find a suitable insert point, before the next instruction or DPValue + // Find a suitable insert point, before the next instruction or DbgRecord // after I. auto InsertBefore = getNextNode(&I); assert(InsertBefore && "Shouldn't be inserting after a terminator"); @@ -1886,21 +1887,21 @@ void AssignmentTrackingLowering::resetInsertionPoint(DPValue &After) { } void AssignmentTrackingLowering::process(BasicBlock &BB, BlockInfo *LiveSet) { - // If the block starts with DPValues, we need to process those DPValues as + // If the block starts with DbgRecords, we need to process those DbgRecords as // their own frame without processing any instructions first. - bool ProcessedLeadingDPValues = !BB.begin()->hasDbgRecords(); + bool ProcessedLeadingDbgRecords = !BB.begin()->hasDbgRecords(); for (auto II = BB.begin(), EI = BB.end(); II != EI;) { assert(VarsTouchedThisFrame.empty()); // Process the instructions in "frames". A "frame" includes a single // non-debug instruction followed any debug instructions before the // next non-debug instruction. - // Skip the current instruction if it has unprocessed DPValues attached (see - // comment above `ProcessedLeadingDPValues`). - if (ProcessedLeadingDPValues) { + // Skip the current instruction if it has unprocessed DbgRecords attached + // (see comment above `ProcessedLeadingDbgRecords`). + if (ProcessedLeadingDbgRecords) { // II is now either a debug intrinsic, a non-debug instruction with no - // attached DPValues, or a non-debug instruction with attached processed - // DPValues. + // attached DbgRecords, or a non-debug instruction with attached processed + // DbgRecords. // II has not been processed. if (!isa<DbgInfoIntrinsic>(&*II)) { if (II->isTerminator()) @@ -1912,8 +1913,8 @@ void AssignmentTrackingLowering::process(BasicBlock &BB, BlockInfo *LiveSet) { } } // II is now either a debug intrinsic, a non-debug instruction with no - // attached DPValues, or a non-debug instruction with attached unprocessed - // DPValues. + // attached DbgRecords, or a non-debug instruction with attached unprocessed + // DbgRecords. if (II != EI && II->hasDbgRecords()) { // Skip over non-variable debug records (i.e., labels). They're going to // be read from IR (possibly re-ordering them within the debug record @@ -1924,7 +1925,7 @@ void AssignmentTrackingLowering::process(BasicBlock &BB, BlockInfo *LiveSet) { assert(LiveSet->isValid()); } } - ProcessedLeadingDPValues = true; + ProcessedLeadingDbgRecords = true; while (II != EI) { auto *Dbg = dyn_cast<DbgInfoIntrinsic>(&*II); if (!Dbg) @@ -1934,9 +1935,9 @@ void AssignmentTrackingLowering::process(BasicBlock &BB, BlockInfo *LiveSet) { assert(LiveSet->isValid()); ++II; } - // II is now a non-debug instruction either with no attached DPValues, or - // with attached processed DPValues. II has not been processed, and all - // debug instructions or DPValues in the frame preceding II have been + // II is now a non-debug instruction either with no attached DbgRecords, or + // with attached processed DbgRecords. II has not been processed, and all + // debug instructions or DbgRecords in the frame preceding II have been // processed. // We've processed everything in the "frame". Now determine which variables diff --git a/llvm/lib/CodeGen/CodeGenPrepare.cpp b/llvm/lib/CodeGen/CodeGenPrepare.cpp index 59a0c64..055e275 100644 --- a/llvm/lib/CodeGen/CodeGenPrepare.cpp +++ b/llvm/lib/CodeGen/CodeGenPrepare.cpp @@ -2946,7 +2946,7 @@ class TypePromotionTransaction { Instruction *PrevInst; BasicBlock *BB; } Point; - std::optional<DPValue::self_iterator> BeforeDPValue = std::nullopt; + std::optional<DbgRecord::self_iterator> BeforeDbgRecord = std::nullopt; /// Remember whether or not the instruction had a previous instruction. bool HasPrevInstruction; @@ -2958,9 +2958,9 @@ class TypePromotionTransaction { BasicBlock *BB = Inst->getParent(); // Record where we would have to re-insert the instruction in the sequence - // of DPValues, if we ended up reinserting. + // of DbgRecords, if we ended up reinserting. if (BB->IsNewDbgInfoFormat) - BeforeDPValue = Inst->getDbgReinsertionPosition(); + BeforeDbgRecord = Inst->getDbgReinsertionPosition(); if (HasPrevInstruction) { Point.PrevInst = &*std::prev(Inst->getIterator()); @@ -2983,7 +2983,7 @@ class TypePromotionTransaction { Inst->insertBefore(*Point.BB, Position); } - Inst->getParent()->reinsertInstInDbgRecords(Inst, BeforeDPValue); + Inst->getParent()->reinsertInstInDbgRecords(Inst, BeforeDbgRecord); } }; diff --git a/llvm/lib/CodeGen/MIRPrinter.cpp b/llvm/lib/CodeGen/MIRPrinter.cpp index 4ed44d1..8efe67a 100644 --- a/llvm/lib/CodeGen/MIRPrinter.cpp +++ b/llvm/lib/CodeGen/MIRPrinter.cpp @@ -982,7 +982,7 @@ void MIRFormatter::printIRValue(raw_ostream &OS, const Value &V, } void llvm::printMIR(raw_ostream &OS, const Module &M) { - // RemoveDIs: as there's no textual form for DPValues yet, print debug-info + // RemoveDIs: as there's no textual form for DbgRecords yet, print debug-info // in dbg.value format. bool IsNewDbgInfoFormat = M.IsNewDbgInfoFormat; if (IsNewDbgInfoFormat) @@ -996,7 +996,7 @@ void llvm::printMIR(raw_ostream &OS, const Module &M) { } void llvm::printMIR(raw_ostream &OS, const MachineFunction &MF) { - // RemoveDIs: as there's no textual form for DPValues yet, print debug-info + // RemoveDIs: as there's no textual form for DbgRecords yet, print debug-info // in dbg.value format. bool IsNewDbgInfoFormat = MF.getFunction().IsNewDbgInfoFormat; if (IsNewDbgInfoFormat) diff --git a/llvm/lib/CodeGen/SelectOptimize.cpp b/llvm/lib/CodeGen/SelectOptimize.cpp index 40898d2..f65d532 100644 --- a/llvm/lib/CodeGen/SelectOptimize.cpp +++ b/llvm/lib/CodeGen/SelectOptimize.cpp @@ -645,12 +645,13 @@ void SelectOptimizeImpl::convertProfitableSIGroups(SelectGroups &ProfSIGroups) { DI->moveBeforePreserving(&*EndBlock->getFirstInsertionPt()); } - // Duplicate implementation for DPValues, the non-instruction debug-info - // record. Helper lambda for moving DPValues to the end block. - auto TransferDPValues = [&](Instruction &I) { - for (auto &DPValue : llvm::make_early_inc_range(I.getDbgRecordRange())) { - DPValue.removeFromParent(); - EndBlock->insertDbgRecordBefore(&DPValue, + // Duplicate implementation for DbgRecords, the non-instruction debug-info + // format. Helper lambda for moving DbgRecords to the end block. + auto TransferDbgRecords = [&](Instruction &I) { + for (auto &DbgRecord : + llvm::make_early_inc_range(I.getDbgRecordRange())) { + DbgRecord.removeFromParent(); + EndBlock->insertDbgRecordBefore(&DbgRecord, EndBlock->getFirstInsertionPt()); } }; @@ -660,7 +661,7 @@ void SelectOptimizeImpl::convertProfitableSIGroups(SelectGroups &ProfSIGroups) { // middle" of the select group. auto R = make_range(std::next(SI.getI()->getIterator()), std::next(LastSI.getI()->getIterator())); - llvm::for_each(R, TransferDPValues); + llvm::for_each(R, TransferDbgRecords); // These are the new basic blocks for the conditional branch. // At least one will become an actual new basic block. diff --git a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp index 735cec8..40b078a 100644 --- a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp @@ -2799,8 +2799,7 @@ SDValue DAGCombiner::visitADDLike(SDNode *N) { // Limit this to after legalization if the add has wrap flags (Level >= AfterLegalizeDAG || (!N->getFlags().hasNoUnsignedWrap() && !N->getFlags().hasNoSignedWrap()))) { - SDValue Not = DAG.getNode(ISD::XOR, DL, VT, N0.getOperand(0), - DAG.getAllOnesConstant(DL, VT)); + SDValue Not = DAG.getNOT(DL, N0.getOperand(0), VT); return DAG.getNode(ISD::SUB, DL, VT, N0.getOperand(1), Not); } } @@ -3025,8 +3024,7 @@ SDValue DAGCombiner::visitADDLikeCommutative(SDValue N0, SDValue N1, // Limit this to after legalization if the add has wrap flags (Level >= AfterLegalizeDAG || (!N0->getFlags().hasNoUnsignedWrap() && !N0->getFlags().hasNoSignedWrap()))) { - SDValue Not = DAG.getNode(ISD::XOR, DL, VT, N0.getOperand(0), - DAG.getAllOnesConstant(DL, VT)); + SDValue Not = DAG.getNOT(DL, N0.getOperand(0), VT); return DAG.getNode(ISD::SUB, DL, VT, N1, Not); } @@ -3789,63 +3787,34 @@ SDValue DAGCombiner::visitSUB(SDNode *N) { return DAG.getNode(ISD::SUB, DL, VT, NewC, N0.getOperand(1)); } - // fold ((A+(B+or-C))-B) -> A+or-C - if (N0.getOpcode() == ISD::ADD && - (N0.getOperand(1).getOpcode() == ISD::SUB || - N0.getOperand(1).getOpcode() == ISD::ADD) && - N0.getOperand(1).getOperand(0) == N1) - return DAG.getNode(N0.getOperand(1).getOpcode(), DL, VT, N0.getOperand(0), - N0.getOperand(1).getOperand(1)); - - // fold ((A+(C+B))-B) -> A+C - if (N0.getOpcode() == ISD::ADD && N0.getOperand(1).getOpcode() == ISD::ADD && - N0.getOperand(1).getOperand(1) == N1) - return DAG.getNode(ISD::ADD, DL, VT, N0.getOperand(0), - N0.getOperand(1).getOperand(0)); + SDValue A, B, C; + + // fold ((A+(B+C))-B) -> A+C + if (sd_match(N0, m_Add(m_Value(A), m_Add(m_Specific(N1), m_Value(C))))) + return DAG.getNode(ISD::ADD, DL, VT, A, C); + + // fold ((A+(B-C))-B) -> A-C + if (sd_match(N0, m_Add(m_Value(A), m_Sub(m_Specific(N1), m_Value(C))))) + return DAG.getNode(ISD::SUB, DL, VT, A, C); // fold ((A-(B-C))-C) -> A-B - if (N0.getOpcode() == ISD::SUB && N0.getOperand(1).getOpcode() == ISD::SUB && - N0.getOperand(1).getOperand(1) == N1) - return DAG.getNode(ISD::SUB, DL, VT, N0.getOperand(0), - N0.getOperand(1).getOperand(0)); + if (sd_match(N0, m_Sub(m_Value(A), m_Sub(m_Value(B), m_Specific(N1))))) + return DAG.getNode(ISD::SUB, DL, VT, A, B); // fold (A-(B-C)) -> A+(C-B) - if (N1.getOpcode() == ISD::SUB && N1.hasOneUse()) + if (sd_match(N1, m_OneUse(m_Sub(m_Value(B), m_Value(C))))) return DAG.getNode(ISD::ADD, DL, VT, N0, - DAG.getNode(ISD::SUB, DL, VT, N1.getOperand(1), - N1.getOperand(0))); + DAG.getNode(ISD::SUB, DL, VT, C, B)); // A - (A & B) -> A & (~B) - if (N1.getOpcode() == ISD::AND) { - SDValue A = N1.getOperand(0); - SDValue B = N1.getOperand(1); - if (A != N0) - std::swap(A, B); - if (A == N0 && - (N1.hasOneUse() || isConstantOrConstantVector(B, /*NoOpaques=*/true))) { - SDValue InvB = - DAG.getNode(ISD::XOR, DL, VT, B, DAG.getAllOnesConstant(DL, VT)); - return DAG.getNode(ISD::AND, DL, VT, A, InvB); - } - } + if (sd_match(N1, m_And(m_Specific(N0), m_Value(B))) && + (N1.hasOneUse() || isConstantOrConstantVector(B, /*NoOpaques=*/true))) + return DAG.getNode(ISD::AND, DL, VT, N0, DAG.getNOT(DL, B, VT)); - // fold (X - (-Y * Z)) -> (X + (Y * Z)) - if (N1.getOpcode() == ISD::MUL && N1.hasOneUse()) { - if (N1.getOperand(0).getOpcode() == ISD::SUB && - isNullOrNullSplat(N1.getOperand(0).getOperand(0))) { - SDValue Mul = DAG.getNode(ISD::MUL, DL, VT, - N1.getOperand(0).getOperand(1), - N1.getOperand(1)); - return DAG.getNode(ISD::ADD, DL, VT, N0, Mul); - } - if (N1.getOperand(1).getOpcode() == ISD::SUB && - isNullOrNullSplat(N1.getOperand(1).getOperand(0))) { - SDValue Mul = DAG.getNode(ISD::MUL, DL, VT, - N1.getOperand(0), - N1.getOperand(1).getOperand(1)); - return DAG.getNode(ISD::ADD, DL, VT, N0, Mul); - } - } + // fold (A - (-B * C)) -> (A + (B * C)) + if (sd_match(N1, m_OneUse(m_Mul(m_Sub(m_Zero(), m_Value(B)), m_Value(C))))) + return DAG.getNode(ISD::ADD, DL, VT, N0, + DAG.getNode(ISD::MUL, DL, VT, B, C)); // If either operand of a sub is undef, the result is undef if (N0.isUndef()) @@ -3865,12 +3834,9 @@ SDValue DAGCombiner::visitSUB(SDNode *N) { if (SDValue V = foldSubToUSubSat(VT, N)) return V; - // (x - y) - 1 -> add (xor y, -1), x - if (N0.getOpcode() == ISD::SUB && N0.hasOneUse() && isOneOrOneSplat(N1)) { - SDValue Xor = DAG.getNode(ISD::XOR, DL, VT, N0.getOperand(1), - DAG.getAllOnesConstant(DL, VT)); - return DAG.getNode(ISD::ADD, DL, VT, Xor, N0.getOperand(0)); - } + // (A - B) - 1 -> add (xor B, -1), A + if (sd_match(N, m_Sub(m_OneUse(m_Sub(m_Value(A), m_Value(B))), m_One()))) + return DAG.getNode(ISD::ADD, DL, VT, A, DAG.getNOT(DL, B, VT)); // Look for: // sub y, (xor x, -1) diff --git a/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp b/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp index c2430359..b8c7d08 100644 --- a/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp @@ -37,6 +37,7 @@ #include "llvm/CodeGen/MachineFunction.h" #include "llvm/CodeGen/MachineMemOperand.h" #include "llvm/CodeGen/RuntimeLibcalls.h" +#include "llvm/CodeGen/SDPatternMatch.h" #include "llvm/CodeGen/SelectionDAGAddressAnalysis.h" #include "llvm/CodeGen/SelectionDAGNodes.h" #include "llvm/CodeGen/SelectionDAGTargetInfo.h" @@ -81,6 +82,7 @@ #include <vector> using namespace llvm; +using namespace llvm::SDPatternMatch; /// makeVTList - Return an instance of the SDVTList struct initialized with the /// specified members. @@ -4290,21 +4292,15 @@ bool SelectionDAG::isKnownToBeAPowerOfTwo(SDValue Val, unsigned Depth) const { return isKnownToBeAPowerOfTwo(Val.getOperand(2), Depth + 1) && isKnownToBeAPowerOfTwo(Val.getOperand(1), Depth + 1); - if (Val.getOpcode() == ISD::AND) { - // Looking for `x & -x` pattern: - // If x == 0: - // x & -x -> 0 - // If x != 0: - // x & -x -> non-zero pow2 - // so if we find the pattern return whether we know `x` is non-zero. - for (unsigned OpIdx = 0; OpIdx < 2; ++OpIdx) { - SDValue NegOp = Val.getOperand(OpIdx); - if (NegOp.getOpcode() == ISD::SUB && - NegOp.getOperand(1) == Val.getOperand(1 - OpIdx) && - isNullOrNullSplat(NegOp.getOperand(0))) - return isKnownNeverZero(Val.getOperand(1 - OpIdx), Depth); - } - } + // Looking for `x & -x` pattern: + // If x == 0: + // x & -x -> 0 + // If x != 0: + // x & -x -> non-zero pow2 + // so if we find the pattern return whether we know `x` is non-zero. + SDValue X; + if (sd_match(Val, m_And(m_Value(X), m_Sub(m_Zero(), m_Deferred(X))))) + return isKnownNeverZero(X, Depth); if (Val.getOpcode() == ISD::ZERO_EXTEND) return isKnownToBeAPowerOfTwo(Val.getOperand(0), Depth + 1); diff --git a/llvm/lib/CodeGen/SjLjEHPrepare.cpp b/llvm/lib/CodeGen/SjLjEHPrepare.cpp index 515b576..4bad57d 100644 --- a/llvm/lib/CodeGen/SjLjEHPrepare.cpp +++ b/llvm/lib/CodeGen/SjLjEHPrepare.cpp @@ -150,9 +150,7 @@ static void MarkBlocksLiveIn(BasicBlock *BB, if (!LiveBBs.insert(BB).second) return; // already been here. - df_iterator_default_set<BasicBlock*> Visited; - - for (BasicBlock *B : inverse_depth_first_ext(BB, Visited)) + for (BasicBlock *B : inverse_depth_first(BB)) LiveBBs.insert(B); } diff --git a/llvm/lib/ExecutionEngine/Orc/TargetProcess/JITLoaderVTune.cpp b/llvm/lib/ExecutionEngine/Orc/TargetProcess/JITLoaderVTune.cpp index d346214..57ac991e 100644 --- a/llvm/lib/ExecutionEngine/Orc/TargetProcess/JITLoaderVTune.cpp +++ b/llvm/lib/ExecutionEngine/Orc/TargetProcess/JITLoaderVTune.cpp @@ -87,7 +87,7 @@ static void registerJITLoaderVTuneUnregisterImpl( for (auto &Method : UM) { JITEventWrapper::Wrapper->iJIT_NotifyEvent( iJVM_EVENT_TYPE_METHOD_UNLOAD_START, - const_cast<unsigned long *>(&Method.first)); + const_cast<uint64_t *>(&Method.first)); } } diff --git a/llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp b/llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp index d65ed8c1..c74c898 100644 --- a/llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp +++ b/llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp @@ -2110,7 +2110,7 @@ Function *getFreshReductionFunc(Module &M) { OpenMPIRBuilder::InsertPointTy OpenMPIRBuilder::createReductions( const LocationDescription &Loc, InsertPointTy AllocaIP, - ArrayRef<ReductionInfo> ReductionInfos, bool IsNoWait) { + ArrayRef<ReductionInfo> ReductionInfos, bool IsNoWait, bool IsByRef) { for (const ReductionInfo &RI : ReductionInfos) { (void)RI; assert(RI.Variable && "expected non-null variable"); @@ -2197,17 +2197,29 @@ OpenMPIRBuilder::InsertPointTy OpenMPIRBuilder::createReductions( for (auto En : enumerate(ReductionInfos)) { const ReductionInfo &RI = En.value(); Type *ValueType = RI.ElementType; - Value *RedValue = Builder.CreateLoad(ValueType, RI.Variable, - "red.value." + Twine(En.index())); + // We have one less load for by-ref case because that load is now inside of + // the reduction region + Value *RedValue = nullptr; + if (!IsByRef) { + RedValue = Builder.CreateLoad(ValueType, RI.Variable, + "red.value." + Twine(En.index())); + } Value *PrivateRedValue = Builder.CreateLoad(ValueType, RI.PrivateVariable, "red.private.value." + Twine(En.index())); Value *Reduced; - Builder.restoreIP( - RI.ReductionGen(Builder.saveIP(), RedValue, PrivateRedValue, Reduced)); + if (IsByRef) { + Builder.restoreIP(RI.ReductionGen(Builder.saveIP(), RI.Variable, + PrivateRedValue, Reduced)); + } else { + Builder.restoreIP(RI.ReductionGen(Builder.saveIP(), RedValue, + PrivateRedValue, Reduced)); + } if (!Builder.GetInsertBlock()) return InsertPointTy(); - Builder.CreateStore(Reduced, RI.Variable); + // for by-ref case, the load is inside of the reduction region + if (!IsByRef) + Builder.CreateStore(Reduced, RI.Variable); } Function *EndReduceFunc = getOrCreateRuntimeFunctionPtr( IsNoWait ? RuntimeFunction::OMPRTL___kmpc_end_reduce_nowait @@ -2219,7 +2231,7 @@ OpenMPIRBuilder::InsertPointTy OpenMPIRBuilder::createReductions( // function. There are no loads/stores here because they will be happening // inside the atomic elementwise reduction. Builder.SetInsertPoint(AtomicRedBlock); - if (CanGenerateAtomic) { + if (CanGenerateAtomic && !IsByRef) { for (const ReductionInfo &RI : ReductionInfos) { Builder.restoreIP(RI.AtomicReductionGen(Builder.saveIP(), RI.ElementType, RI.Variable, RI.PrivateVariable)); @@ -2257,7 +2269,9 @@ OpenMPIRBuilder::InsertPointTy OpenMPIRBuilder::createReductions( Builder.restoreIP(RI.ReductionGen(Builder.saveIP(), LHS, RHS, Reduced)); if (!Builder.GetInsertBlock()) return InsertPointTy(); - Builder.CreateStore(Reduced, LHSPtr); + // store is inside of the reduction region when using by-ref + if (!IsByRef) + Builder.CreateStore(Reduced, LHSPtr); } Builder.CreateRetVoid(); diff --git a/llvm/lib/IR/AsmWriter.cpp b/llvm/lib/IR/AsmWriter.cpp index 1beb4c0..11383ea 100644 --- a/llvm/lib/IR/AsmWriter.cpp +++ b/llvm/lib/IR/AsmWriter.cpp @@ -4592,7 +4592,7 @@ void AssemblyWriter::printInstruction(const Instruction &I) { void AssemblyWriter::printDPMarker(const DPMarker &Marker) { // There's no formal representation of a DPMarker -- print purely as a // debugging aid. - for (const DbgRecord &DPR : Marker.StoredDPValues) { + for (const DbgRecord &DPR : Marker.StoredDbgRecords) { printDbgRecord(DPR); Out << "\n"; } diff --git a/llvm/lib/IR/BasicBlock.cpp b/llvm/lib/IR/BasicBlock.cpp index 7ead7ce..4dd1bdd 100644 --- a/llvm/lib/IR/BasicBlock.cpp +++ b/llvm/lib/IR/BasicBlock.cpp @@ -63,9 +63,9 @@ DPMarker *BasicBlock::createMarker(InstListType::iterator It) { void BasicBlock::convertToNewDbgValues() { IsNewDbgInfoFormat = true; - // Iterate over all instructions in the instruction list, collecting dbg.value - // instructions and converting them to DPValues. Once we find a "real" - // instruction, attach all those DPValues to a DPMarker in that instruction. + // Iterate over all instructions in the instruction list, collecting debug + // info intrinsics and converting them to DbgRecords. Once we find a "real" + // instruction, attach all those DbgRecords to a DPMarker in that instruction. SmallVector<DbgRecord *, 4> DPVals; for (Instruction &I : make_early_inc_range(InstList)) { assert(!I.DbgMarker && "DbgMarker already set on old-format instrs?"); @@ -86,7 +86,7 @@ void BasicBlock::convertToNewDbgValues() { if (DPVals.empty()) continue; - // Create a marker to store DPValues in. + // Create a marker to store DbgRecords in. createMarker(&I); DPMarker *Marker = I.DbgMarker; @@ -102,7 +102,7 @@ void BasicBlock::convertFromNewDbgValues() { IsNewDbgInfoFormat = false; // Iterate over the block, finding instructions annotated with DPMarkers. - // Convert any attached DPValues to dbg.values and insert ahead of the + // Convert any attached DbgRecords to debug intrinsics and insert ahead of the // instruction. for (auto &Inst : *this) { if (!Inst.DbgMarker) @@ -116,7 +116,7 @@ void BasicBlock::convertFromNewDbgValues() { Marker.eraseFromParent(); } - // Assume no trailing DPValues: we could technically create them at the end + // Assume no trailing DbgRecords: we could technically create them at the end // of the block, after a terminator, but this would be non-cannonical and // indicates that something else is broken somewhere. assert(!getTrailingDbgRecords()); @@ -691,15 +691,15 @@ void BasicBlock::renumberInstructions() { NumInstrRenumberings++; } -void BasicBlock::flushTerminatorDbgValues() { - // If we erase the terminator in a block, any DPValues will sink and "fall +void BasicBlock::flushTerminatorDbgRecords() { + // If we erase the terminator in a block, any DbgRecords will sink and "fall // off the end", existing after any terminator that gets inserted. With // dbg.value intrinsics we would just insert the terminator at end() and - // the dbg.values would come before the terminator. With DPValues, we must + // the dbg.values would come before the terminator. With DbgRecords, we must // do this manually. // To get out of this unfortunate form, whenever we insert a terminator, - // check whether there's anything trailing at the end and move those DPValues - // in front of the terminator. + // check whether there's anything trailing at the end and move those + // DbgRecords in front of the terminator. // Do nothing if we're not in new debug-info format. if (!IsNewDbgInfoFormat) @@ -710,15 +710,15 @@ void BasicBlock::flushTerminatorDbgValues() { if (!Term) return; - // Are there any dangling DPValues? - DPMarker *TrailingDPValues = getTrailingDbgRecords(); - if (!TrailingDPValues) + // Are there any dangling DbgRecords? + DPMarker *TrailingDbgRecords = getTrailingDbgRecords(); + if (!TrailingDbgRecords) return; - // Transfer DPValues from the trailing position onto the terminator. + // Transfer DbgRecords from the trailing position onto the terminator. createMarker(Term); - Term->DbgMarker->absorbDebugValues(*TrailingDPValues, false); - TrailingDPValues->eraseFromParent(); + Term->DbgMarker->absorbDebugValues(*TrailingDbgRecords, false); + TrailingDbgRecords->eraseFromParent(); deleteTrailingDbgRecords(); } @@ -735,7 +735,7 @@ void BasicBlock::spliceDebugInfoEmptyBlock(BasicBlock::iterator Dest, // If an optimisation pass attempts to splice the contents of the block from // BB1->begin() to BB1->getTerminator(), then the dbg.value will be // transferred to the destination. - // However, in the "new" DPValue format for debug-info, that range is empty: + // However, in the "new" DbgRecord format for debug-info, that range is empty: // begin() returns an iterator to the terminator, as there will only be a // single instruction in the block. We must piece together from the bits set // in the iterators whether there was the intention to transfer any debug @@ -750,16 +750,16 @@ void BasicBlock::spliceDebugInfoEmptyBlock(BasicBlock::iterator Dest, bool ReadFromHead = First.getHeadBit(); // If the source block is completely empty, including no terminator, then - // transfer any trailing DPValues that are still hanging around. This can + // transfer any trailing DbgRecords that are still hanging around. This can // occur when a block is optimised away and the terminator has been moved // somewhere else. if (Src->empty()) { - DPMarker *SrcTrailingDPValues = Src->getTrailingDbgRecords(); - if (!SrcTrailingDPValues) + DPMarker *SrcTrailingDbgRecords = Src->getTrailingDbgRecords(); + if (!SrcTrailingDbgRecords) return; Dest->adoptDbgRecords(Src, Src->end(), InsertAtHead); - // adoptDbgRecords should have released the trailing DPValues. + // adoptDbgRecords should have released the trailing DbgRecords. assert(!Src->getTrailingDbgRecords()); return; } @@ -785,8 +785,8 @@ void BasicBlock::spliceDebugInfo(BasicBlock::iterator Dest, BasicBlock *Src, /* Do a quick normalisation before calling the real splice implementation. We might be operating on a degenerate basic block that has no instructions in it, a legitimate transient state. In that case, Dest will be end() and - any DPValues temporarily stored in the TrailingDPValues map in LLVMContext. - We might illustrate it thus: + any DbgRecords temporarily stored in the TrailingDbgRecords map in + LLVMContext. We might illustrate it thus: Dest | @@ -795,35 +795,35 @@ void BasicBlock::spliceDebugInfo(BasicBlock::iterator Dest, BasicBlock *Src, | | First Last - However: does the caller expect the "~" DPValues to end up before or after - the spliced segment? This is communciated in the "Head" bit of Dest, which - signals whether the caller called begin() or end() on this block. + However: does the caller expect the "~" DbgRecords to end up before or + after the spliced segment? This is communciated in the "Head" bit of Dest, + which signals whether the caller called begin() or end() on this block. - If the head bit is set, then all is well, we leave DPValues trailing just + If the head bit is set, then all is well, we leave DbgRecords trailing just like how dbg.value instructions would trail after instructions spliced to the beginning of this block. - If the head bit isn't set, then try to jam the "~" DPValues onto the front - of the First instruction, then splice like normal, which joins the "~" - DPValues with the "+" DPValues. However if the "+" DPValues are supposed to - be left behind in Src, then: - * detach the "+" DPValues, - * move the "~" DPValues onto First, + If the head bit isn't set, then try to jam the "~" DbgRecords onto the + front of the First instruction, then splice like normal, which joins the + "~" DbgRecords with the "+" DbgRecords. However if the "+" DbgRecords are + supposed to be left behind in Src, then: + * detach the "+" DbgRecords, + * move the "~" DbgRecords onto First, * splice like normal, - * replace the "+" DPValues onto the Last position. + * replace the "+" DbgRecords onto the Last position. Complicated, but gets the job done. */ - // If we're inserting at end(), and not in front of dangling DPValues, then - // move the DPValues onto "First". They'll then be moved naturally in the + // If we're inserting at end(), and not in front of dangling DbgRecords, then + // move the DbgRecords onto "First". They'll then be moved naturally in the // splice process. - DPMarker *MoreDanglingDPValues = nullptr; - DPMarker *OurTrailingDPValues = getTrailingDbgRecords(); - if (Dest == end() && !Dest.getHeadBit() && OurTrailingDPValues) { - // Are the "+" DPValues not supposed to move? If so, detach them + DPMarker *MoreDanglingDbgRecords = nullptr; + DPMarker *OurTrailingDbgRecords = getTrailingDbgRecords(); + if (Dest == end() && !Dest.getHeadBit() && OurTrailingDbgRecords) { + // Are the "+" DbgRecords not supposed to move? If so, detach them // temporarily. if (!First.getHeadBit() && First->hasDbgRecords()) { - MoreDanglingDPValues = Src->getMarker(First); - MoreDanglingDPValues->removeFromParent(); + MoreDanglingDbgRecords = Src->getMarker(First); + MoreDanglingDbgRecords->removeFromParent(); } if (First->hasDbgRecords()) { @@ -839,8 +839,8 @@ void BasicBlock::spliceDebugInfo(BasicBlock::iterator Dest, BasicBlock *Src, // No current marker, create one and absorb in. (FIXME: we can avoid an // allocation in the future). DPMarker *CurMarker = Src->createMarker(&*First); - CurMarker->absorbDebugValues(*OurTrailingDPValues, false); - OurTrailingDPValues->eraseFromParent(); + CurMarker->absorbDebugValues(*OurTrailingDbgRecords, false); + OurTrailingDbgRecords->eraseFromParent(); } deleteTrailingDbgRecords(); First.setHeadBit(true); @@ -849,16 +849,16 @@ void BasicBlock::spliceDebugInfo(BasicBlock::iterator Dest, BasicBlock *Src, // Call the main debug-info-splicing implementation. spliceDebugInfoImpl(Dest, Src, First, Last); - // Do we have some "+" DPValues hanging around that weren't supposed to move, - // and we detached to make things easier? - if (!MoreDanglingDPValues) + // Do we have some "+" DbgRecords hanging around that weren't supposed to + // move, and we detached to make things easier? + if (!MoreDanglingDbgRecords) return; // FIXME: we could avoid an allocation here sometimes. (adoptDbgRecords // requires an iterator). DPMarker *LastMarker = Src->createMarker(Last); - LastMarker->absorbDebugValues(*MoreDanglingDPValues, true); - MoreDanglingDPValues->eraseFromParent(); + LastMarker->absorbDebugValues(*MoreDanglingDbgRecords, true); + MoreDanglingDbgRecords->eraseFromParent(); } void BasicBlock::spliceDebugInfoImpl(BasicBlock::iterator Dest, BasicBlock *Src, @@ -870,15 +870,16 @@ void BasicBlock::spliceDebugInfoImpl(BasicBlock::iterator Dest, BasicBlock *Src, bool InsertAtHead = Dest.getHeadBit(); bool ReadFromHead = First.getHeadBit(); // Use this flag to signal the abnormal case, where we don't want to copy the - // DPValues ahead of the "Last" position. + // DbgRecords ahead of the "Last" position. bool ReadFromTail = !Last.getTailBit(); bool LastIsEnd = (Last == Src->end()); /* Here's an illustration of what we're about to do. We have two blocks, this and Src, and two segments of list. Each instruction is marked by a capital - while potential DPValue debug-info is marked out by "-" characters and a few - other special characters (+:=) where I want to highlight what's going on. + while potential DbgRecord debug-info is marked out by "-" characters and a + few other special characters (+:=) where I want to highlight what's going + on. Dest | @@ -889,18 +890,18 @@ void BasicBlock::spliceDebugInfoImpl(BasicBlock::iterator Dest, BasicBlock *Src, The splice method is going to take all the instructions from First up to (but not including) Last and insert them in _front_ of Dest, forming one - long list. All the DPValues attached to instructions _between_ First and + long list. All the DbgRecords attached to instructions _between_ First and Last need no maintenence. However, we have to do special things with the - DPValues marked with the +:= characters. We only have three positions: - should the "+" DPValues be transferred, and if so to where? Do we move the - ":" DPValues? Would they go in front of the "=" DPValues, or should the "=" - DPValues go before "+" DPValues? + DbgRecords marked with the +:= characters. We only have three positions: + should the "+" DbgRecords be transferred, and if so to where? Do we move the + ":" DbgRecords? Would they go in front of the "=" DbgRecords, or should the + "=" DbgRecords go before "+" DbgRecords? We're told which way it should be by the bits carried in the iterators. The "Head" bit indicates whether the specified position is supposed to be at the - front of the attached DPValues (true) or not (false). The Tail bit is true - on the other end of a range: is the range intended to include DPValues up to - the end (false) or not (true). + front of the attached DbgRecords (true) or not (false). The Tail bit is true + on the other end of a range: is the range intended to include DbgRecords up + to the end (false) or not (true). FIXME: the tail bit doesn't need to be distinct from the head bit, we could combine them. @@ -934,15 +935,16 @@ void BasicBlock::spliceDebugInfoImpl(BasicBlock::iterator Dest, BasicBlock *Src, */ - // Detach the marker at Dest -- this lets us move the "====" DPValues around. + // Detach the marker at Dest -- this lets us move the "====" DbgRecords + // around. DPMarker *DestMarker = nullptr; if (Dest != end()) { if ((DestMarker = getMarker(Dest))) DestMarker->removeFromParent(); } - // If we're moving the tail range of DPValues (":::"), absorb them into the - // front of the DPValues at Dest. + // If we're moving the tail range of DbgRecords (":::"), absorb them into the + // front of the DbgRecords at Dest. if (ReadFromTail && Src->getMarker(Last)) { DPMarker *FromLast = Src->getMarker(Last); if (LastIsEnd) { @@ -956,7 +958,7 @@ void BasicBlock::spliceDebugInfoImpl(BasicBlock::iterator Dest, BasicBlock *Src, } } - // If we're _not_ reading from the head of First, i.e. the "++++" DPValues, + // If we're _not_ reading from the head of First, i.e. the "++++" DbgRecords, // move their markers onto Last. They remain in the Src block. No action // needed. if (!ReadFromHead && First->hasDbgRecords()) { @@ -970,16 +972,16 @@ void BasicBlock::spliceDebugInfoImpl(BasicBlock::iterator Dest, BasicBlock *Src, } } - // Finally, do something with the "====" DPValues we detached. + // Finally, do something with the "====" DbgRecords we detached. if (DestMarker) { if (InsertAtHead) { - // Insert them at the end of the DPValues at Dest. The "::::" DPValues + // Insert them at the end of the DbgRecords at Dest. The "::::" DbgRecords // might be in front of them. DPMarker *NewDestMarker = createMarker(Dest); NewDestMarker->absorbDebugValues(*DestMarker, false); } else { // Insert them right at the start of the range we moved, ahead of First - // and the "++++" DPValues. + // and the "++++" DbgRecords. DPMarker *FirstMarker = createMarker(First); FirstMarker->absorbDebugValues(*DestMarker, true); } @@ -990,10 +992,10 @@ void BasicBlock::spliceDebugInfoImpl(BasicBlock::iterator Dest, BasicBlock *Src, // any trailing debug-info at the end of the block would "normally" have // been pushed in front of "First". Move it there now. DPMarker *FirstMarker = getMarker(First); - DPMarker *TrailingDPValues = getTrailingDbgRecords(); - if (TrailingDPValues) { - FirstMarker->absorbDebugValues(*TrailingDPValues, true); - TrailingDPValues->eraseFromParent(); + DPMarker *TrailingDbgRecords = getTrailingDbgRecords(); + if (TrailingDbgRecords) { + FirstMarker->absorbDebugValues(*TrailingDbgRecords, true); + TrailingDbgRecords->eraseFromParent(); deleteTrailingDbgRecords(); } } @@ -1024,7 +1026,7 @@ void BasicBlock::splice(iterator Dest, BasicBlock *Src, iterator First, // And move the instructions. getInstList().splice(Dest, Src->getInstList(), First, Last); - flushTerminatorDbgValues(); + flushTerminatorDbgRecords(); } void BasicBlock::insertDbgRecordAfter(DbgRecord *DPV, Instruction *I) { @@ -1057,38 +1059,40 @@ DPMarker *BasicBlock::getMarker(InstListType::iterator It) { } void BasicBlock::reinsertInstInDbgRecords( - Instruction *I, std::optional<DPValue::self_iterator> Pos) { + Instruction *I, std::optional<DbgRecord::self_iterator> Pos) { // "I" was originally removed from a position where it was - // immediately in front of Pos. Any DPValues on that position then "fell down" - // onto Pos. "I" has been re-inserted at the front of that wedge of DPValues, - // shuffle them around to represent the original positioning. To illustrate: + // immediately in front of Pos. Any DbgRecords on that position then "fell + // down" onto Pos. "I" has been re-inserted at the front of that wedge of + // DbgRecords, shuffle them around to represent the original positioning. To + // illustrate: // // Instructions: I1---I---I0 - // DPValues: DDD DDD + // DbgRecords: DDD DDD // // Instruction "I" removed, // // Instructions: I1------I0 - // DPValues: DDDDDD + // DbgRecords: DDDDDD // ^Pos // // Instruction "I" re-inserted (now): // // Instructions: I1---I------I0 - // DPValues: DDDDDD + // DbgRecords: DDDDDD // ^Pos // // After this method completes: // // Instructions: I1---I---I0 - // DPValues: DDD DDD + // DbgRecords: DDD DDD - // This happens if there were no DPValues on I0. Are there now DPValues there? + // This happens if there were no DbgRecords on I0. Are there now DbgRecords + // there? if (!Pos) { DPMarker *NextMarker = getNextMarker(I); if (!NextMarker) return; - if (NextMarker->StoredDPValues.empty()) + if (NextMarker->StoredDbgRecords.empty()) return; // There are DPMarkers there now -- they fell down from "I". DPMarker *ThisMarker = createMarker(I); @@ -1096,15 +1100,15 @@ void BasicBlock::reinsertInstInDbgRecords( return; } - // Is there even a range of DPValues to move? + // Is there even a range of DbgRecords to move? DPMarker *DPM = (*Pos)->getMarker(); - auto Range = make_range(DPM->StoredDPValues.begin(), (*Pos)); + auto Range = make_range(DPM->StoredDbgRecords.begin(), (*Pos)); if (Range.begin() == Range.end()) return; // Otherwise: splice. DPMarker *ThisMarker = createMarker(I); - assert(ThisMarker->StoredDPValues.empty()); + assert(ThisMarker->StoredDbgRecords.empty()); ThisMarker->absorbDebugValues(Range, *DPM, true); } diff --git a/llvm/lib/IR/DebugInfo.cpp b/llvm/lib/IR/DebugInfo.cpp index e63b1e6..d168950 100644 --- a/llvm/lib/IR/DebugInfo.cpp +++ b/llvm/lib/IR/DebugInfo.cpp @@ -895,7 +895,7 @@ bool llvm::stripNonLineTableDebugInfo(Module &M) { if (I.hasMetadataOtherThanDebugLoc()) I.setMetadata("heapallocsite", nullptr); - // Strip any DPValues attached. + // Strip any DbgRecords attached. I.dropDbgRecords(); } } diff --git a/llvm/lib/IR/DebugProgramInstruction.cpp b/llvm/lib/IR/DebugProgramInstruction.cpp index 019b00c..f34d3ae 100644 --- a/llvm/lib/IR/DebugProgramInstruction.cpp +++ b/llvm/lib/IR/DebugProgramInstruction.cpp @@ -1,4 +1,4 @@ -//======-- DebugProgramInstruction.cpp - Implement DPValues/DPMarkers --======// +//=====-- DebugProgramInstruction.cpp - Implement DbgRecords/DPMarkers --=====// // // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. // See https://llvm.org/LICENSE.txt for license information. @@ -541,21 +541,21 @@ void DbgRecord::moveAfter(DbgRecord *MoveAfter) { /////////////////////////////////////////////////////////////////////////////// // An empty, global, DPMarker for the purpose of describing empty ranges of -// DPValues. +// DbgRecords. DPMarker DPMarker::EmptyDPMarker; void DPMarker::dropDbgRecords() { - while (!StoredDPValues.empty()) { - auto It = StoredDPValues.begin(); + while (!StoredDbgRecords.empty()) { + auto It = StoredDbgRecords.begin(); DbgRecord *DR = &*It; - StoredDPValues.erase(It); + StoredDbgRecords.erase(It); DR->deleteRecord(); } } void DPMarker::dropOneDbgRecord(DbgRecord *DR) { assert(DR->getMarker() == this); - StoredDPValues.erase(DR->getIterator()); + StoredDbgRecords.erase(DR->getIterator()); DR->deleteRecord(); } @@ -566,15 +566,15 @@ const BasicBlock *DPMarker::getParent() const { BasicBlock *DPMarker::getParent() { return MarkedInstr->getParent(); } void DPMarker::removeMarker() { - // Are there any DPValues in this DPMarker? If not, nothing to preserve. + // Are there any DbgRecords in this DPMarker? If not, nothing to preserve. Instruction *Owner = MarkedInstr; - if (StoredDPValues.empty()) { + if (StoredDbgRecords.empty()) { eraseFromParent(); Owner->DbgMarker = nullptr; return; } - // The attached DPValues need to be preserved; attach them to the next + // The attached DbgRecords need to be preserved; attach them to the next // instruction. If there isn't a next instruction, put them on the // "trailing" list. DPMarker *NextMarker = Owner->getParent()->getNextMarker(Owner); @@ -610,15 +610,15 @@ void DPMarker::eraseFromParent() { } iterator_range<DbgRecord::self_iterator> DPMarker::getDbgRecordRange() { - return make_range(StoredDPValues.begin(), StoredDPValues.end()); + return make_range(StoredDbgRecords.begin(), StoredDbgRecords.end()); } iterator_range<DbgRecord::const_self_iterator> DPMarker::getDbgRecordRange() const { - return make_range(StoredDPValues.begin(), StoredDPValues.end()); + return make_range(StoredDbgRecords.begin(), StoredDbgRecords.end()); } void DbgRecord::removeFromParent() { - getMarker()->StoredDPValues.erase(getIterator()); + getMarker()->StoredDbgRecords.erase(getIterator()); Marker = nullptr; } @@ -628,29 +628,29 @@ void DbgRecord::eraseFromParent() { } void DPMarker::insertDbgRecord(DbgRecord *New, bool InsertAtHead) { - auto It = InsertAtHead ? StoredDPValues.begin() : StoredDPValues.end(); - StoredDPValues.insert(It, *New); + auto It = InsertAtHead ? StoredDbgRecords.begin() : StoredDbgRecords.end(); + StoredDbgRecords.insert(It, *New); New->setMarker(this); } void DPMarker::insertDbgRecord(DbgRecord *New, DbgRecord *InsertBefore) { assert(InsertBefore->getMarker() == this && - "DPValue 'InsertBefore' must be contained in this DPMarker!"); - StoredDPValues.insert(InsertBefore->getIterator(), *New); + "DbgRecord 'InsertBefore' must be contained in this DPMarker!"); + StoredDbgRecords.insert(InsertBefore->getIterator(), *New); New->setMarker(this); } void DPMarker::insertDbgRecordAfter(DbgRecord *New, DbgRecord *InsertAfter) { assert(InsertAfter->getMarker() == this && - "DPValue 'InsertAfter' must be contained in this DPMarker!"); - StoredDPValues.insert(++(InsertAfter->getIterator()), *New); + "DbgRecord 'InsertAfter' must be contained in this DPMarker!"); + StoredDbgRecords.insert(++(InsertAfter->getIterator()), *New); New->setMarker(this); } void DPMarker::absorbDebugValues(DPMarker &Src, bool InsertAtHead) { - auto It = InsertAtHead ? StoredDPValues.begin() : StoredDPValues.end(); - for (DbgRecord &DPV : Src.StoredDPValues) + auto It = InsertAtHead ? StoredDbgRecords.begin() : StoredDbgRecords.end(); + for (DbgRecord &DPV : Src.StoredDbgRecords) DPV.setMarker(this); - StoredDPValues.splice(It, Src.StoredDPValues); + StoredDbgRecords.splice(It, Src.StoredDbgRecords); } void DPMarker::absorbDebugValues(iterator_range<DbgRecord::self_iterator> Range, @@ -659,45 +659,45 @@ void DPMarker::absorbDebugValues(iterator_range<DbgRecord::self_iterator> Range, DR.setMarker(this); auto InsertPos = - (InsertAtHead) ? StoredDPValues.begin() : StoredDPValues.end(); + (InsertAtHead) ? StoredDbgRecords.begin() : StoredDbgRecords.end(); - StoredDPValues.splice(InsertPos, Src.StoredDPValues, Range.begin(), - Range.end()); + StoredDbgRecords.splice(InsertPos, Src.StoredDbgRecords, Range.begin(), + Range.end()); } iterator_range<simple_ilist<DbgRecord>::iterator> DPMarker::cloneDebugInfoFrom( DPMarker *From, std::optional<simple_ilist<DbgRecord>::iterator> from_here, bool InsertAtHead) { DbgRecord *First = nullptr; - // Work out what range of DPValues to clone: normally all the contents of the - // "From" marker, optionally we can start from the from_here position down to - // end(). + // Work out what range of DbgRecords to clone: normally all the contents of + // the "From" marker, optionally we can start from the from_here position down + // to end(). auto Range = - make_range(From->StoredDPValues.begin(), From->StoredDPValues.end()); + make_range(From->StoredDbgRecords.begin(), From->StoredDbgRecords.end()); if (from_here.has_value()) - Range = make_range(*from_here, From->StoredDPValues.end()); + Range = make_range(*from_here, From->StoredDbgRecords.end()); // Clone each DPValue and insert into StoreDPValues; optionally place them at // the start or the end of the list. - auto Pos = (InsertAtHead) ? StoredDPValues.begin() : StoredDPValues.end(); + auto Pos = (InsertAtHead) ? StoredDbgRecords.begin() : StoredDbgRecords.end(); for (DbgRecord &DR : Range) { DbgRecord *New = DR.clone(); New->setMarker(this); - StoredDPValues.insert(Pos, *New); + StoredDbgRecords.insert(Pos, *New); if (!First) First = New; } if (!First) - return {StoredDPValues.end(), StoredDPValues.end()}; + return {StoredDbgRecords.end(), StoredDbgRecords.end()}; if (InsertAtHead) // If InsertAtHead is set, we cloned a range onto the front of of the - // StoredDPValues collection, return that range. - return {StoredDPValues.begin(), Pos}; + // StoredDbgRecords collection, return that range. + return {StoredDbgRecords.begin(), Pos}; else // We inserted a block at the end, return that range. - return {First->getIterator(), StoredDPValues.end()}; + return {First->getIterator(), StoredDbgRecords.end()}; } } // end namespace llvm diff --git a/llvm/lib/IR/Instruction.cpp b/llvm/lib/IR/Instruction.cpp index e089239..7a677d7 100644 --- a/llvm/lib/IR/Instruction.cpp +++ b/llvm/lib/IR/Instruction.cpp @@ -143,7 +143,7 @@ void Instruction::insertBefore(BasicBlock &BB, return; // We've inserted "this": if InsertAtHead is set then it comes before any - // DPValues attached to InsertPos. But if it's not set, then any DPValues + // DPValues attached to InsertPos. But if it's not set, then any DbgRecords // should now come before "this". bool InsertAtHead = InsertPos.getHeadBit(); if (!InsertAtHead) { @@ -166,10 +166,10 @@ void Instruction::insertBefore(BasicBlock &BB, } // If we're inserting a terminator, check if we need to flush out - // TrailingDPValues. Inserting instructions at the end of an incomplete + // TrailingDbgRecords. Inserting instructions at the end of an incomplete // block is handled by the code block above. if (isTerminator()) - getParent()->flushTerminatorDbgValues(); + getParent()->flushTerminatorDbgRecords(); } /// Unlink this instruction from its current basic block and insert it into the @@ -212,12 +212,12 @@ void Instruction::moveBeforeImpl(BasicBlock &BB, InstListType::iterator I, assert(I == BB.end() || I->getParent() == &BB); bool InsertAtHead = I.getHeadBit(); - // If we've been given the "Preserve" flag, then just move the DPValues with + // If we've been given the "Preserve" flag, then just move the DbgRecords with // the instruction, no more special handling needed. if (BB.IsNewDbgInfoFormat && DbgMarker && !Preserve) { if (I != this->getIterator() || InsertAtHead) { // "this" is definitely moving in the list, or it's moving ahead of its - // attached DPValues. Detach any existing DPValues. + // attached DPValues. Detach any existing DbgRecords. handleMarkerRemoval(); } } @@ -229,15 +229,15 @@ void Instruction::moveBeforeImpl(BasicBlock &BB, InstListType::iterator I, if (BB.IsNewDbgInfoFormat && !Preserve) { DPMarker *NextMarker = getParent()->getNextMarker(this); - // If we're inserting at point I, and not in front of the DPValues attached - // there, then we should absorb the DPValues attached to I. + // If we're inserting at point I, and not in front of the DbgRecords + // attached there, then we should absorb the DbgRecords attached to I. if (!InsertAtHead && NextMarker && !NextMarker->empty()) { adoptDbgRecords(&BB, I, false); } } if (isTerminator()) - getParent()->flushTerminatorDbgValues(); + getParent()->flushTerminatorDbgRecords(); } iterator_range<DbgRecord::self_iterator> Instruction::cloneDebugInfoFrom( @@ -263,11 +263,11 @@ Instruction::getDbgReinsertionPosition() { if (!NextMarker) return std::nullopt; - // Are there any DPValues in the next marker? - if (NextMarker->StoredDPValues.empty()) + // Are there any DbgRecords in the next marker? + if (NextMarker->StoredDbgRecords.empty()) return std::nullopt; - return NextMarker->StoredDPValues.begin(); + return NextMarker->StoredDbgRecords.begin(); } bool Instruction::hasDbgRecords() const { return !getDbgRecordRange().empty(); } @@ -275,20 +275,20 @@ bool Instruction::hasDbgRecords() const { return !getDbgRecordRange().empty(); } void Instruction::adoptDbgRecords(BasicBlock *BB, BasicBlock::iterator It, bool InsertAtHead) { DPMarker *SrcMarker = BB->getMarker(It); - auto ReleaseTrailingDPValues = [BB, It, SrcMarker]() { + auto ReleaseTrailingDbgRecords = [BB, It, SrcMarker]() { if (BB->end() == It) { SrcMarker->eraseFromParent(); BB->deleteTrailingDbgRecords(); } }; - if (!SrcMarker || SrcMarker->StoredDPValues.empty()) { - ReleaseTrailingDPValues(); + if (!SrcMarker || SrcMarker->StoredDbgRecords.empty()) { + ReleaseTrailingDbgRecords(); return; } // If we have DPMarkers attached to this instruction, we have to honour the - // ordering of DPValues between this and the other marker. Fall back to just + // ordering of DbgRecords between this and the other marker. Fall back to just // absorbing from the source. if (DbgMarker || It == BB->end()) { // Ensure we _do_ have a marker. @@ -304,10 +304,11 @@ void Instruction::adoptDbgRecords(BasicBlock *BB, BasicBlock::iterator It, // block, it's important to not leave the empty marker trailing. It will // give a misleading impression that some debug records have been left // trailing. - ReleaseTrailingDPValues(); + ReleaseTrailingDbgRecords(); } else { - // Optimisation: we're transferring all the DPValues from the source marker - // onto this empty location: just adopt the other instructions marker. + // Optimisation: we're transferring all the DbgRecords from the source + // marker onto this empty location: just adopt the other instructions + // marker. DbgMarker = SrcMarker; DbgMarker->MarkedInstr = this; It->DbgMarker = nullptr; diff --git a/llvm/lib/IR/LLVMContextImpl.cpp b/llvm/lib/IR/LLVMContextImpl.cpp index a0bf9ca..a471314 100644 --- a/llvm/lib/IR/LLVMContextImpl.cpp +++ b/llvm/lib/IR/LLVMContextImpl.cpp @@ -50,7 +50,7 @@ LLVMContextImpl::~LLVMContextImpl() { // when it's terminator was removed were eventually replaced. This assertion // firing indicates that DPValues went missing during the lifetime of the // LLVMContext. - assert(TrailingDPValues.empty() && "DPValue records in blocks not cleaned"); + assert(TrailingDbgRecords.empty() && "DbgRecords in blocks not cleaned"); #endif // NOTE: We need to delete the contents of OwnedModules, but Module's dtor diff --git a/llvm/lib/IR/LLVMContextImpl.h b/llvm/lib/IR/LLVMContextImpl.h index c841b28..b1dcb26 100644 --- a/llvm/lib/IR/LLVMContextImpl.h +++ b/llvm/lib/IR/LLVMContextImpl.h @@ -1684,19 +1684,19 @@ public: /// such a way. These are stored in LLVMContext because typically LLVM only /// edits a small number of blocks at a time, so there's no need to bloat /// BasicBlock with such a data structure. - SmallDenseMap<BasicBlock *, DPMarker *> TrailingDPValues; + SmallDenseMap<BasicBlock *, DPMarker *> TrailingDbgRecords; - // Set, get and delete operations for TrailingDPValues. + // Set, get and delete operations for TrailingDbgRecords. void setTrailingDbgRecords(BasicBlock *B, DPMarker *M) { - assert(!TrailingDPValues.count(B)); - TrailingDPValues[B] = M; + assert(!TrailingDbgRecords.count(B)); + TrailingDbgRecords[B] = M; } DPMarker *getTrailingDbgRecords(BasicBlock *B) { - return TrailingDPValues.lookup(B); + return TrailingDbgRecords.lookup(B); } - void deleteTrailingDbgRecords(BasicBlock *B) { TrailingDPValues.erase(B); } + void deleteTrailingDbgRecords(BasicBlock *B) { TrailingDbgRecords.erase(B); } }; } // end namespace llvm diff --git a/llvm/lib/MC/MCSectionXCOFF.cpp b/llvm/lib/MC/MCSectionXCOFF.cpp index 95d32e3..609ef09 100644 --- a/llvm/lib/MC/MCSectionXCOFF.cpp +++ b/llvm/lib/MC/MCSectionXCOFF.cpp @@ -87,8 +87,7 @@ void MCSectionXCOFF::printSwitchToSection(const MCAsmInfo &MAI, const Triple &T, if (getKind().isCommon() && !getKind().isBSSLocal()) return; - assert((getKind().isBSSExtern() || getKind().isBSSLocal()) && - "Unexepected section kind for toc-data"); + assert(getKind().isBSS() && "Unexpected section kind for toc-data"); printCsectDirective(OS); return; } diff --git a/llvm/lib/Object/Archive.cpp b/llvm/lib/Object/Archive.cpp index 9000e9a..6139d99 100644 --- a/llvm/lib/Object/Archive.cpp +++ b/llvm/lib/Object/Archive.cpp @@ -969,12 +969,19 @@ Archive::Archive(MemoryBufferRef Source, Error &Err) Err = Error::success(); } +object::Archive::Kind Archive::getDefaultKindForTriple(Triple &T) { + if (T.isOSDarwin()) + return object::Archive::K_DARWIN; + if (T.isOSAIX()) + return object::Archive::K_AIXBIG; + if (T.isOSWindows()) + return object::Archive::K_COFF; + return object::Archive::K_GNU; +} + object::Archive::Kind Archive::getDefaultKind() { Triple HostTriple(sys::getDefaultTargetTriple()); - return HostTriple.isOSDarwin() - ? object::Archive::K_DARWIN - : (HostTriple.isOSAIX() ? object::Archive::K_AIXBIG - : object::Archive::K_GNU); + return getDefaultKindForTriple(HostTriple); } Archive::child_iterator Archive::child_begin(Error &Err, diff --git a/llvm/lib/Object/ArchiveWriter.cpp b/llvm/lib/Object/ArchiveWriter.cpp index e062974..aa57e55 100644 --- a/llvm/lib/Object/ArchiveWriter.cpp +++ b/llvm/lib/Object/ArchiveWriter.cpp @@ -62,12 +62,16 @@ object::Archive::Kind NewArchiveMember::detectKindFromObject() const { Expected<std::unique_ptr<object::ObjectFile>> OptionalObject = object::ObjectFile::createObjectFile(MemBufferRef); - if (OptionalObject) - return isa<object::MachOObjectFile>(**OptionalObject) - ? object::Archive::K_DARWIN - : (isa<object::XCOFFObjectFile>(**OptionalObject) - ? object::Archive::K_AIXBIG - : object::Archive::K_GNU); + if (OptionalObject) { + if (isa<object::MachOObjectFile>(**OptionalObject)) + return object::Archive::K_DARWIN; + if (isa<object::XCOFFObjectFile>(**OptionalObject)) + return object::Archive::K_AIXBIG; + if (isa<object::COFFObjectFile>(**OptionalObject) || + isa<object::COFFImportFile>(**OptionalObject)) + return object::Archive::K_COFF; + return object::Archive::K_GNU; + } // Squelch the error in case we had a non-object file. consumeError(OptionalObject.takeError()); @@ -80,10 +84,7 @@ object::Archive::Kind NewArchiveMember::detectKindFromObject() const { MemBufferRef, file_magic::bitcode, &Context)) { auto &IRObject = cast<object::IRObjectFile>(**ObjOrErr); auto TargetTriple = Triple(IRObject.getTargetTriple()); - return TargetTriple.isOSDarwin() - ? object::Archive::K_DARWIN - : (TargetTriple.isOSAIX() ? object::Archive::K_AIXBIG - : object::Archive::K_GNU); + return object::Archive::getDefaultKindForTriple(TargetTriple); } else { // Squelch the error in case this was not a SymbolicFile. consumeError(ObjOrErr.takeError()); @@ -976,10 +977,12 @@ static Error writeArchiveToStream(raw_ostream &Out, SmallString<0> StringTableBuf; raw_svector_ostream StringTable(StringTableBuf); SymMap SymMap; + bool ShouldWriteSymtab = WriteSymtab != SymtabWritingMode::NoSymtab; // COFF symbol map uses 16-bit indexes, so we can't use it if there are too - // many members. - if (isCOFFArchive(Kind) && NewMembers.size() > 0xfffe) + // many members. COFF format also requires symbol table presence, so use + // GNU format when NoSymtab is requested. + if (isCOFFArchive(Kind) && (NewMembers.size() > 0xfffe || !ShouldWriteSymtab)) Kind = object::Archive::K_GNU; // In the scenario when LLVMContext is populated SymbolicFile will contain a @@ -1008,7 +1011,6 @@ static Error writeArchiveToStream(raw_ostream &Out, uint64_t LastMemberHeaderOffset = 0; uint64_t NumSyms = 0; uint64_t NumSyms32 = 0; // Store symbol number of 32-bit member files. - bool ShouldWriteSymtab = WriteSymtab != SymtabWritingMode::NoSymtab; for (const auto &M : Data) { // Record the start of the member's offset diff --git a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp index 054311d..9665ae5 100644 --- a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp +++ b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp @@ -7751,7 +7751,7 @@ void AArch64TargetLowering::AdjustInstrPostInstrSelection(MachineInstr &MI, // register allocator to pass call args in callee saved regs, without extra // copies to avoid these fake clobbers of actually-preserved GPRs. if (MI.getOpcode() == AArch64::MSRpstatesvcrImm1 || - MI.getOpcode() == AArch64::MSRpstatePseudo) + MI.getOpcode() == AArch64::MSRpstatePseudo) { for (unsigned I = MI.getNumOperands() - 1; I > 0; --I) if (MachineOperand &MO = MI.getOperand(I); MO.isReg() && MO.isImplicit() && MO.isDef() && @@ -7759,6 +7759,16 @@ void AArch64TargetLowering::AdjustInstrPostInstrSelection(MachineInstr &MI, AArch64::GPR64RegClass.contains(MO.getReg()))) MI.removeOperand(I); + // The SVE vector length can change when entering/leaving streaming mode. + if (MI.getOperand(0).getImm() == AArch64SVCR::SVCRSM || + MI.getOperand(0).getImm() == AArch64SVCR::SVCRSMZA) { + MI.addOperand(MachineOperand::CreateReg(AArch64::VG, /*IsDef=*/false, + /*IsImplicit=*/true)); + MI.addOperand(MachineOperand::CreateReg(AArch64::VG, /*IsDef=*/true, + /*IsImplicit=*/true)); + } + } + // Add an implicit use of 'VG' for ADDXri/SUBXri, which are instructions that // have nothing to do with VG, were it not that they are used to materialise a // frame-address. If they contain a frame-index to a scalable vector, this @@ -21413,12 +21423,8 @@ static SDValue performUzpCombine(SDNode *N, SelectionDAG &DAG, } } - // uzp1(xtn x, xtn y) -> xtn(uzp1 (x, y)) - // Only implemented on little-endian subtargets. - bool IsLittleEndian = DAG.getDataLayout().isLittleEndian(); - - // This optimization only works on little endian. - if (!IsLittleEndian) + // These optimizations only work on little endian. + if (!DAG.getDataLayout().isLittleEndian()) return SDValue(); // uzp1(bitcast(x), bitcast(y)) -> uzp1(x, y) @@ -21437,21 +21443,28 @@ static SDValue performUzpCombine(SDNode *N, SelectionDAG &DAG, if (ResVT != MVT::v2i32 && ResVT != MVT::v4i16 && ResVT != MVT::v8i8) return SDValue(); - auto getSourceOp = [](SDValue Operand) -> SDValue { - const unsigned Opcode = Operand.getOpcode(); - if (Opcode == ISD::TRUNCATE) - return Operand->getOperand(0); - if (Opcode == ISD::BITCAST && - Operand->getOperand(0).getOpcode() == ISD::TRUNCATE) - return Operand->getOperand(0)->getOperand(0); - return SDValue(); - }; + SDValue SourceOp0 = peekThroughBitcasts(Op0); + SDValue SourceOp1 = peekThroughBitcasts(Op1); - SDValue SourceOp0 = getSourceOp(Op0); - SDValue SourceOp1 = getSourceOp(Op1); + // truncating uzp1(x, y) -> xtn(concat (x, y)) + if (SourceOp0.getValueType() == SourceOp1.getValueType()) { + EVT Op0Ty = SourceOp0.getValueType(); + if ((ResVT == MVT::v4i16 && Op0Ty == MVT::v2i32) || + (ResVT == MVT::v8i8 && Op0Ty == MVT::v4i16)) { + SDValue Concat = + DAG.getNode(ISD::CONCAT_VECTORS, DL, + Op0Ty.getDoubleNumVectorElementsVT(*DAG.getContext()), + SourceOp0, SourceOp1); + return DAG.getNode(ISD::TRUNCATE, DL, ResVT, Concat); + } + } - if (!SourceOp0 || !SourceOp1) + // uzp1(xtn x, xtn y) -> xtn(uzp1 (x, y)) + if (SourceOp0.getOpcode() != ISD::TRUNCATE || + SourceOp1.getOpcode() != ISD::TRUNCATE) return SDValue(); + SourceOp0 = SourceOp0.getOperand(0); + SourceOp1 = SourceOp1.getOperand(0); if (SourceOp0.getValueType() != SourceOp1.getValueType() || !SourceOp0.getValueType().isSimple()) diff --git a/llvm/lib/Target/AArch64/AArch64InstrInfo.td b/llvm/lib/Target/AArch64/AArch64InstrInfo.td index 6254e68..b4b975c 100644 --- a/llvm/lib/Target/AArch64/AArch64InstrInfo.td +++ b/llvm/lib/Target/AArch64/AArch64InstrInfo.td @@ -6153,26 +6153,39 @@ defm UZP2 : SIMDZipVector<0b101, "uzp2", AArch64uzp2>; defm ZIP1 : SIMDZipVector<0b011, "zip1", AArch64zip1>; defm ZIP2 : SIMDZipVector<0b111, "zip2", AArch64zip2>; -def : Pat<(v16i8 (concat_vectors (v8i8 (trunc (v8i16 V128:$Vn))), - (v8i8 (trunc (v8i16 V128:$Vm))))), - (UZP1v16i8 V128:$Vn, V128:$Vm)>; -def : Pat<(v8i16 (concat_vectors (v4i16 (trunc (v4i32 V128:$Vn))), - (v4i16 (trunc (v4i32 V128:$Vm))))), - (UZP1v8i16 V128:$Vn, V128:$Vm)>; -def : Pat<(v4i32 (concat_vectors (v2i32 (trunc (v2i64 V128:$Vn))), - (v2i32 (trunc (v2i64 V128:$Vm))))), - (UZP1v4i32 V128:$Vn, V128:$Vm)>; -// These are the same as above, with an optional assertzext node that can be -// generated from fptoi lowering. -def : Pat<(v16i8 (concat_vectors (v8i8 (assertzext (trunc (v8i16 V128:$Vn)))), - (v8i8 (assertzext (trunc (v8i16 V128:$Vm)))))), - (UZP1v16i8 V128:$Vn, V128:$Vm)>; -def : Pat<(v8i16 (concat_vectors (v4i16 (assertzext (trunc (v4i32 V128:$Vn)))), - (v4i16 (assertzext (trunc (v4i32 V128:$Vm)))))), - (UZP1v8i16 V128:$Vn, V128:$Vm)>; -def : Pat<(v4i32 (concat_vectors (v2i32 (assertzext (trunc (v2i64 V128:$Vn)))), - (v2i32 (assertzext (trunc (v2i64 V128:$Vm)))))), - (UZP1v4i32 V128:$Vn, V128:$Vm)>; +def trunc_optional_assert_ext : PatFrags<(ops node:$op0), + [(trunc node:$op0), + (assertzext (trunc node:$op0)), + (assertsext (trunc node:$op0))]>; + +// concat_vectors(trunc(x), trunc(y)) -> uzp1(x, y) +// concat_vectors(assertzext(trunc(x)), assertzext(trunc(y))) -> uzp1(x, y) +// concat_vectors(assertsext(trunc(x)), assertsext(trunc(y))) -> uzp1(x, y) +class concat_trunc_to_uzp1_pat<ValueType SrcTy, ValueType TruncTy, ValueType ConcatTy> + : Pat<(ConcatTy (concat_vectors (TruncTy (trunc_optional_assert_ext (SrcTy V128:$Vn))), + (TruncTy (trunc_optional_assert_ext (SrcTy V128:$Vm))))), + (!cast<Instruction>("UZP1"#ConcatTy) V128:$Vn, V128:$Vm)>; +def : concat_trunc_to_uzp1_pat<v8i16, v8i8, v16i8>; +def : concat_trunc_to_uzp1_pat<v4i32, v4i16, v8i16>; +def : concat_trunc_to_uzp1_pat<v2i64, v2i32, v4i32>; + +// trunc(concat_vectors(trunc(x), trunc(y))) -> xtn(uzp1(x, y)) +// trunc(concat_vectors(assertzext(trunc(x)), assertzext(trunc(y)))) -> xtn(uzp1(x, y)) +// trunc(concat_vectors(assertsext(trunc(x)), assertsext(trunc(y)))) -> xtn(uzp1(x, y)) +class trunc_concat_trunc_to_xtn_uzp1_pat<ValueType SrcTy, ValueType TruncTy, ValueType ConcatTy, + ValueType Ty> + : Pat<(Ty (trunc_optional_assert_ext + (ConcatTy (concat_vectors + (TruncTy (trunc_optional_assert_ext (SrcTy V128:$Vn))), + (TruncTy (trunc_optional_assert_ext (SrcTy V128:$Vm))))))), + (!cast<Instruction>("XTN"#Ty) (!cast<Instruction>("UZP1"#ConcatTy) V128:$Vn, V128:$Vm))>; +def : trunc_concat_trunc_to_xtn_uzp1_pat<v4i32, v4i16, v8i16, v8i8>; +def : trunc_concat_trunc_to_xtn_uzp1_pat<v2i64, v2i32, v4i32, v4i16>; + +def : Pat<(v8i8 (trunc (concat_vectors (v4i16 V64:$Vn), (v4i16 V64:$Vm)))), + (UZP1v8i8 V64:$Vn, V64:$Vm)>; +def : Pat<(v4i16 (trunc (concat_vectors (v2i32 V64:$Vn), (v2i32 V64:$Vm)))), + (UZP1v4i16 V64:$Vn, V64:$Vm)>; def : Pat<(v16i8 (concat_vectors (v8i8 (trunc (AArch64vlshr (v8i16 V128:$Vn), (i32 8)))), diff --git a/llvm/lib/Target/AArch64/SMEInstrFormats.td b/llvm/lib/Target/AArch64/SMEInstrFormats.td index 33cb5f9..44d9a8a 100644 --- a/llvm/lib/Target/AArch64/SMEInstrFormats.td +++ b/llvm/lib/Target/AArch64/SMEInstrFormats.td @@ -223,8 +223,6 @@ def MSRpstatesvcrImm1 let Inst{8} = imm; let Inst{7-5} = 0b011; // op2 let hasPostISelHook = 1; - let Uses = [VG]; - let Defs = [VG]; } def : InstAlias<"smstart", (MSRpstatesvcrImm1 0b011, 0b1)>; diff --git a/llvm/lib/Target/AMDGPU/AMDGPUInstructions.td b/llvm/lib/Target/AMDGPU/AMDGPUInstructions.td index 5ed82c0..86f77f7 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPUInstructions.td +++ b/llvm/lib/Target/AMDGPU/AMDGPUInstructions.td @@ -194,7 +194,25 @@ class HasOneUseTernaryOp<SDPatternOperator op> : PatFrag< }]; } -class is_canonicalized<SDPatternOperator op> : PatFrag< +class is_canonicalized_1<SDPatternOperator op> : PatFrag< + (ops node:$src0), + (op $src0), + [{ + const SITargetLowering &Lowering = + *static_cast<const SITargetLowering *>(getTargetLowering()); + + return Lowering.isCanonicalized(*CurDAG, N->getOperand(0)); + }]> { + + let GISelPredicateCode = [{ + const SITargetLowering *TLI = static_cast<const SITargetLowering *>( + MF.getSubtarget().getTargetLowering()); + + return TLI->isCanonicalized(MI.getOperand(1).getReg(), MF); + }]; +} + +class is_canonicalized_2<SDPatternOperator op> : PatFrag< (ops node:$src0, node:$src1), (op $src0, $src1), [{ @@ -210,8 +228,8 @@ class is_canonicalized<SDPatternOperator op> : PatFrag< const SITargetLowering *TLI = static_cast<const SITargetLowering *>( MF.getSubtarget().getTargetLowering()); - return TLI->isCanonicalized(MI.getOperand(1).getReg(), const_cast<MachineFunction&>(MF)) && - TLI->isCanonicalized(MI.getOperand(2).getReg(), const_cast<MachineFunction&>(MF)); + return TLI->isCanonicalized(MI.getOperand(1).getReg(), MF) && + TLI->isCanonicalized(MI.getOperand(2).getReg(), MF); }]; } diff --git a/llvm/lib/Target/AMDGPU/BUFInstructions.td b/llvm/lib/Target/AMDGPU/BUFInstructions.td index c709102..4ae514f 100644 --- a/llvm/lib/Target/AMDGPU/BUFInstructions.td +++ b/llvm/lib/Target/AMDGPU/BUFInstructions.td @@ -2616,7 +2616,6 @@ defm BUFFER_ATOMIC_CMPSWAP_X2 : MUBUF_Real_Atomic_gfx11_gfx12<0x042, "buffer defm BUFFER_ATOMIC_FCMPSWAP : MUBUF_Real_Atomic_gfx11<0x050, "buffer_atomic_cmpswap_f32">; defm BUFFER_ATOMIC_COND_SUB_U32 : MUBUF_Real_Atomic_gfx12<0x050>; defm BUFFER_ATOMIC_CSUB : MUBUF_Real_Atomic_gfx11_gfx12<0x037, "buffer_atomic_sub_clamp_u32", "buffer_atomic_csub_u32">; -def : Mnem_gfx11_gfx12<"buffer_atomic_csub", "buffer_atomic_csub_u32">; defm BUFFER_ATOMIC_DEC : MUBUF_Real_Atomic_gfx11_gfx12<0x040, "buffer_atomic_dec_u32">; defm BUFFER_ATOMIC_DEC_X2 : MUBUF_Real_Atomic_gfx11_gfx12<0x04D, "buffer_atomic_dec_u64">; defm BUFFER_ATOMIC_INC : MUBUF_Real_Atomic_gfx11_gfx12<0x03F, "buffer_atomic_inc_u32">; diff --git a/llvm/lib/Target/AMDGPU/DSInstructions.td b/llvm/lib/Target/AMDGPU/DSInstructions.td index 87ace01..e944dde 100644 --- a/llvm/lib/Target/AMDGPU/DSInstructions.td +++ b/llvm/lib/Target/AMDGPU/DSInstructions.td @@ -1735,14 +1735,12 @@ def DS_WRITE_B128_vi : DS_Real_vi<0xdf, DS_WRITE_B128>; def DS_READ_B96_vi : DS_Real_vi<0xfe, DS_READ_B96>; def DS_READ_B128_vi : DS_Real_vi<0xff, DS_READ_B128>; -let SubtargetPredicate = isGFX90APlus in { - def DS_ADD_F64_vi : DS_Real_vi<0x5c, DS_ADD_F64>; - def DS_ADD_RTN_F64_vi : DS_Real_vi<0x7c, DS_ADD_RTN_F64>; -} // End SubtargetPredicate = isGFX90APlus - -let SubtargetPredicate = isGFX940Plus in { - def DS_PK_ADD_F16_vi : DS_Real_vi<0x17, DS_PK_ADD_F16>; - def DS_PK_ADD_RTN_F16_vi : DS_Real_vi<0xb7, DS_PK_ADD_RTN_F16>; - def DS_PK_ADD_BF16_vi : DS_Real_vi<0x18, DS_PK_ADD_BF16>; - def DS_PK_ADD_RTN_BF16_vi : DS_Real_vi<0xb8, DS_PK_ADD_RTN_BF16>; -} // End SubtargetPredicate = isGFX940Plus +// GFX90A+. +def DS_ADD_F64_vi : DS_Real_vi<0x5c, DS_ADD_F64>; +def DS_ADD_RTN_F64_vi : DS_Real_vi<0x7c, DS_ADD_RTN_F64>; + +// GFX940+. +def DS_PK_ADD_F16_vi : DS_Real_vi<0x17, DS_PK_ADD_F16>; +def DS_PK_ADD_RTN_F16_vi : DS_Real_vi<0xb7, DS_PK_ADD_RTN_F16>; +def DS_PK_ADD_BF16_vi : DS_Real_vi<0x18, DS_PK_ADD_BF16>; +def DS_PK_ADD_RTN_BF16_vi : DS_Real_vi<0xb8, DS_PK_ADD_RTN_BF16>; diff --git a/llvm/lib/Target/AMDGPU/SIISelLowering.cpp b/llvm/lib/Target/AMDGPU/SIISelLowering.cpp index 9bc1b8e..5ccf21f 100644 --- a/llvm/lib/Target/AMDGPU/SIISelLowering.cpp +++ b/llvm/lib/Target/AMDGPU/SIISelLowering.cpp @@ -12572,6 +12572,10 @@ bool SITargetLowering::isCanonicalized(SelectionDAG &DAG, SDValue Op, case ISD::FREM: case ISD::FP_ROUND: case ISD::FP_EXTEND: + case ISD::FP16_TO_FP: + case ISD::FP_TO_FP16: + case ISD::BF16_TO_FP: + case ISD::FP_TO_BF16: case ISD::FLDEXP: case AMDGPUISD::FMUL_LEGACY: case AMDGPUISD::FMAD_FTZ: @@ -12591,6 +12595,9 @@ bool SITargetLowering::isCanonicalized(SelectionDAG &DAG, SDValue Op, case AMDGPUISD::CVT_F32_UBYTE1: case AMDGPUISD::CVT_F32_UBYTE2: case AMDGPUISD::CVT_F32_UBYTE3: + case AMDGPUISD::FP_TO_FP16: + case AMDGPUISD::SIN_HW: + case AMDGPUISD::COS_HW: return true; // It can/will be lowered or combined as a bit operation. @@ -12600,6 +12607,20 @@ bool SITargetLowering::isCanonicalized(SelectionDAG &DAG, SDValue Op, case ISD::FCOPYSIGN: return isCanonicalized(DAG, Op.getOperand(0), MaxDepth - 1); + case ISD::AND: + if (Op.getValueType() == MVT::i32) { + // Be careful as we only know it is a bitcast floating point type. It + // could be f32, v2f16, we have no way of knowing. Luckily the constant + // value that we optimize for, which comes up in fp32 to bf16 conversions, + // is valid to optimize for all types. + if (auto *RHS = dyn_cast<ConstantSDNode>(Op.getOperand(1))) { + if (RHS->getZExtValue() == 0xffff0000) { + return isCanonicalized(DAG, Op.getOperand(0), MaxDepth - 1); + } + } + } + break; + case ISD::FSIN: case ISD::FCOS: case ISD::FSINCOS: @@ -12665,6 +12686,9 @@ bool SITargetLowering::isCanonicalized(SelectionDAG &DAG, SDValue Op, return false; case ISD::BITCAST: + // TODO: This is incorrect as it loses track of the operand's type. We may + // end up effectively bitcasting from f32 to v2f16 or vice versa, and the + // same bits that are canonicalized in one type need not be in the other. return isCanonicalized(DAG, Op.getOperand(0), MaxDepth - 1); case ISD::TRUNCATE: { // Hack round the mess we make when legalizing extract_vector_elt @@ -12694,25 +12718,26 @@ bool SITargetLowering::isCanonicalized(SelectionDAG &DAG, SDValue Op, case Intrinsic::amdgcn_trig_preop: case Intrinsic::amdgcn_log: case Intrinsic::amdgcn_exp2: + case Intrinsic::amdgcn_sqrt: return true; default: break; } - [[fallthrough]]; + break; } default: - // FIXME: denormalsEnabledForType is broken for dynamic - return denormalsEnabledForType(DAG, Op.getValueType()) && - DAG.isKnownNeverSNaN(Op); + break; } - llvm_unreachable("invalid operation"); + // FIXME: denormalsEnabledForType is broken for dynamic + return denormalsEnabledForType(DAG, Op.getValueType()) && + DAG.isKnownNeverSNaN(Op); } -bool SITargetLowering::isCanonicalized(Register Reg, MachineFunction &MF, +bool SITargetLowering::isCanonicalized(Register Reg, const MachineFunction &MF, unsigned MaxDepth) const { - MachineRegisterInfo &MRI = MF.getRegInfo(); + const MachineRegisterInfo &MRI = MF.getRegInfo(); MachineInstr *MI = MRI.getVRegDef(Reg); unsigned Opcode = MI->getOpcode(); @@ -12931,27 +12956,7 @@ SDValue SITargetLowering::performFCanonicalizeCombine( } } - unsigned SrcOpc = N0.getOpcode(); - - // If it's free to do so, push canonicalizes further up the source, which may - // find a canonical source. - // - // TODO: More opcodes. Note this is unsafe for the _ieee minnum/maxnum for - // sNaNs. - if (SrcOpc == ISD::FMINNUM || SrcOpc == ISD::FMAXNUM) { - auto *CRHS = dyn_cast<ConstantFPSDNode>(N0.getOperand(1)); - if (CRHS && N0.hasOneUse()) { - SDLoc SL(N); - SDValue Canon0 = DAG.getNode(ISD::FCANONICALIZE, SL, VT, - N0.getOperand(0)); - SDValue Canon1 = getCanonicalConstantFP(DAG, SL, VT, CRHS->getValueAPF()); - DCI.AddToWorklist(Canon0.getNode()); - - return DAG.getNode(N0.getOpcode(), SL, VT, Canon0, Canon1); - } - } - - return isCanonicalized(DAG, N0) ? N0 : SDValue(); + return SDValue(); } static unsigned minMaxOpcToMin3Max3Opc(unsigned Opc) { @@ -15939,8 +15944,8 @@ bool SITargetLowering::denormalsEnabledForType(const SelectionDAG &DAG, } } -bool SITargetLowering::denormalsEnabledForType(LLT Ty, - MachineFunction &MF) const { +bool SITargetLowering::denormalsEnabledForType( + LLT Ty, const MachineFunction &MF) const { switch (Ty.getScalarSizeInBits()) { case 32: return !denormalModeIsFlushAllF32(MF); diff --git a/llvm/lib/Target/AMDGPU/SIISelLowering.h b/llvm/lib/Target/AMDGPU/SIISelLowering.h index a20442e..89da442 100644 --- a/llvm/lib/Target/AMDGPU/SIISelLowering.h +++ b/llvm/lib/Target/AMDGPU/SIISelLowering.h @@ -523,10 +523,10 @@ public: bool isCanonicalized(SelectionDAG &DAG, SDValue Op, unsigned MaxDepth = 5) const; - bool isCanonicalized(Register Reg, MachineFunction &MF, + bool isCanonicalized(Register Reg, const MachineFunction &MF, unsigned MaxDepth = 5) const; bool denormalsEnabledForType(const SelectionDAG &DAG, EVT VT) const; - bool denormalsEnabledForType(LLT Ty, MachineFunction &MF) const; + bool denormalsEnabledForType(LLT Ty, const MachineFunction &MF) const; bool checkForPhysRegDependency(SDNode *Def, SDNode *User, unsigned Op, const TargetRegisterInfo *TRI, diff --git a/llvm/lib/Target/AMDGPU/SIInstructions.td b/llvm/lib/Target/AMDGPU/SIInstructions.td index 33c93cd..3ab7884 100644 --- a/llvm/lib/Target/AMDGPU/SIInstructions.td +++ b/llvm/lib/Target/AMDGPU/SIInstructions.td @@ -2944,6 +2944,34 @@ def : GCNPat< (V_BFREV_B32_e64 (i32 (EXTRACT_SUBREG VReg_64:$a, sub1))), sub0, (V_BFREV_B32_e64 (i32 (EXTRACT_SUBREG VReg_64:$a, sub0))), sub1)>; +// If fcanonicalize's operand is implicitly canonicalized, we only need a copy. +let AddedComplexity = 1000 in { +def : GCNPat< + (is_canonicalized_1<fcanonicalize> f16:$src), + (COPY f16:$src) +>; + +def : GCNPat< + (is_canonicalized_1<fcanonicalize> v2f16:$src), + (COPY v2f16:$src) +>; + +def : GCNPat< + (is_canonicalized_1<fcanonicalize> f32:$src), + (COPY f32:$src) +>; + +def : GCNPat< + (is_canonicalized_1<fcanonicalize> v2f32:$src), + (COPY v2f32:$src) +>; + +def : GCNPat< + (is_canonicalized_1<fcanonicalize> f64:$src), + (COPY f64:$src) +>; +} + // Prefer selecting to max when legal, but using mul is always valid. let AddedComplexity = -5 in { @@ -3277,8 +3305,8 @@ def : GCNPat < let AddedComplexity = 5 in { def : GCNPat < - (v2f16 (is_canonicalized<build_vector> (f16 (VOP3Mods (f16 VGPR_32:$src0), i32:$src0_mods)), - (f16 (VOP3Mods (f16 VGPR_32:$src1), i32:$src1_mods)))), + (v2f16 (is_canonicalized_2<build_vector> (f16 (VOP3Mods (f16 VGPR_32:$src0), i32:$src0_mods)), + (f16 (VOP3Mods (f16 VGPR_32:$src1), i32:$src1_mods)))), (V_PACK_B32_F16_e64 $src0_mods, VGPR_32:$src0, $src1_mods, VGPR_32:$src1) >; } @@ -3590,6 +3618,17 @@ FPMinMaxPat<Instruction minmaxInst, ValueType vt, SDPatternOperator min_or_max, DSTCLAMP.NONE, DSTOMOD.NONE) >; +class +FPMinCanonMaxPat<Instruction minmaxInst, ValueType vt, SDPatternOperator min_or_max, + SDPatternOperator max_or_min_oneuse> : GCNPat < + (min_or_max (is_canonicalized_1<fcanonicalize> + (max_or_min_oneuse (VOP3Mods vt:$src0, i32:$src0_mods), + (VOP3Mods vt:$src1, i32:$src1_mods))), + (vt (VOP3Mods vt:$src2, i32:$src2_mods))), + (minmaxInst $src0_mods, $src0, $src1_mods, $src1, $src2_mods, $src2, + DSTCLAMP.NONE, DSTOMOD.NONE) +>; + let OtherPredicates = [isGFX11Plus] in { def : IntMinMaxPat<V_MAXMIN_I32_e64, smin, smax_oneuse>; def : IntMinMaxPat<V_MINMAX_I32_e64, smax, smin_oneuse>; @@ -3599,6 +3638,10 @@ def : FPMinMaxPat<V_MINMAX_F32_e64, f32, fmaxnum_like, fminnum_like_oneuse>; def : FPMinMaxPat<V_MAXMIN_F32_e64, f32, fminnum_like, fmaxnum_like_oneuse>; def : FPMinMaxPat<V_MINMAX_F16_e64, f16, fmaxnum_like, fminnum_like_oneuse>; def : FPMinMaxPat<V_MAXMIN_F16_e64, f16, fminnum_like, fmaxnum_like_oneuse>; +def : FPMinCanonMaxPat<V_MINMAX_F32_e64, f32, fmaxnum_like, fminnum_like_oneuse>; +def : FPMinCanonMaxPat<V_MAXMIN_F32_e64, f32, fminnum_like, fmaxnum_like_oneuse>; +def : FPMinCanonMaxPat<V_MINMAX_F16_e64, f16, fmaxnum_like, fminnum_like_oneuse>; +def : FPMinCanonMaxPat<V_MAXMIN_F16_e64, f16, fminnum_like, fmaxnum_like_oneuse>; } let OtherPredicates = [isGFX9Plus] in { @@ -3612,6 +3655,10 @@ def : FPMinMaxPat<V_MINIMUMMAXIMUM_F32_e64, f32, DivergentBinFrag<fmaximum>, fmi def : FPMinMaxPat<V_MAXIMUMMINIMUM_F32_e64, f32, DivergentBinFrag<fminimum>, fmaximum_oneuse>; def : FPMinMaxPat<V_MINIMUMMAXIMUM_F16_e64, f16, DivergentBinFrag<fmaximum>, fminimum_oneuse>; def : FPMinMaxPat<V_MAXIMUMMINIMUM_F16_e64, f16, DivergentBinFrag<fminimum>, fmaximum_oneuse>; +def : FPMinCanonMaxPat<V_MINIMUMMAXIMUM_F32_e64, f32, DivergentBinFrag<fmaximum>, fminimum_oneuse>; +def : FPMinCanonMaxPat<V_MAXIMUMMINIMUM_F32_e64, f32, DivergentBinFrag<fminimum>, fmaximum_oneuse>; +def : FPMinCanonMaxPat<V_MINIMUMMAXIMUM_F16_e64, f16, DivergentBinFrag<fmaximum>, fminimum_oneuse>; +def : FPMinCanonMaxPat<V_MAXIMUMMINIMUM_F16_e64, f16, DivergentBinFrag<fminimum>, fmaximum_oneuse>; } // Convert a floating-point power of 2 to the integer exponent. diff --git a/llvm/lib/Target/PowerPC/GISel/PPCInstructionSelector.cpp b/llvm/lib/Target/PowerPC/GISel/PPCInstructionSelector.cpp index 3fd7a1a..98cd3a8 100644 --- a/llvm/lib/Target/PowerPC/GISel/PPCInstructionSelector.cpp +++ b/llvm/lib/Target/PowerPC/GISel/PPCInstructionSelector.cpp @@ -695,8 +695,8 @@ bool PPCInstructionSelector::selectConstantPool( .addReg(HaAddrReg) .addMemOperand(MMO); else - // For medium code model, generate ADDItocL(CPI, ADDIStocHA8(X2, CPI)) - MI = BuildMI(MBB, I, DbgLoc, TII.get(PPC::ADDItocL), DstReg) + // For medium code model, generate ADDItocL8(CPI, ADDIStocHA8(X2, CPI)) + MI = BuildMI(MBB, I, DbgLoc, TII.get(PPC::ADDItocL8), DstReg) .addReg(HaAddrReg) .addConstantPoolIndex(CPI); } diff --git a/llvm/lib/Target/PowerPC/P10InstrResources.td b/llvm/lib/Target/PowerPC/P10InstrResources.td index 3bbc5a6..5015ba8 100644 --- a/llvm/lib/Target/PowerPC/P10InstrResources.td +++ b/llvm/lib/Target/PowerPC/P10InstrResources.td @@ -881,7 +881,7 @@ def : InstRW<[P10W_FX_3C, P10W_DISP_ANY], // 3 Cycles ALU operations, 1 input operands def : InstRW<[P10W_FX_3C, P10W_DISP_ANY, P10FX_Read], (instrs - ADDI, ADDI8, ADDIdtprelL32, ADDItlsldLADDR32, ADDItocL, LI, LI8, + ADDI, ADDI8, ADDIdtprelL32, ADDItlsldLADDR32, ADDItocL8, LI, LI8, ADDIC, ADDIC8, ADDIS, ADDIS8, ADDISdtprelHA32, ADDIStocHA, ADDIStocHA8, LIS, LIS8, ADDME, ADDME8, diff --git a/llvm/lib/Target/PowerPC/PPCAsmPrinter.cpp b/llvm/lib/Target/PowerPC/PPCAsmPrinter.cpp index 9396ca2..542854e 100644 --- a/llvm/lib/Target/PowerPC/PPCAsmPrinter.cpp +++ b/llvm/lib/Target/PowerPC/PPCAsmPrinter.cpp @@ -1236,8 +1236,8 @@ void PPCAsmPrinter::emitInstruction(const MachineInstr *MI) { EmitToStreamer(*OutStreamer, TmpInst); return; } - case PPC::ADDItocL: { - // Transform %xd = ADDItocL %xs, @sym + case PPC::ADDItocL8: { + // Transform %xd = ADDItocL8 %xs, @sym LowerPPCMachineInstrToMCInst(MI, TmpInst, *this); // Change the opcode to ADDI8. If the global address is external, then @@ -1246,7 +1246,7 @@ void PPCAsmPrinter::emitInstruction(const MachineInstr *MI) { TmpInst.setOpcode(PPC::ADDI8); const MachineOperand &MO = MI->getOperand(2); - assert((MO.isGlobal() || MO.isCPI()) && "Invalid operand for ADDItocL."); + assert((MO.isGlobal() || MO.isCPI()) && "Invalid operand for ADDItocL8."); LLVM_DEBUG(assert( !(MO.isGlobal() && Subtarget->isGVIndirectSymbol(MO.getGlobal())) && @@ -2659,6 +2659,8 @@ void PPCAIXAsmPrinter::emitGlobalVariable(const GlobalVariable *GV) { // If the Global Variable has the toc-data attribute, it needs to be emitted // when we emit the .toc section. if (GV->hasAttribute("toc-data")) { + unsigned PointerSize = GV->getParent()->getDataLayout().getPointerSize(); + Subtarget->tocDataChecks(PointerSize, GV); TOCDataGlobalVars.push_back(GV); return; } diff --git a/llvm/lib/Target/PowerPC/PPCBack2BackFusion.def b/llvm/lib/Target/PowerPC/PPCBack2BackFusion.def index 8bbe315..6bb66bc 100644 --- a/llvm/lib/Target/PowerPC/PPCBack2BackFusion.def +++ b/llvm/lib/Target/PowerPC/PPCBack2BackFusion.def @@ -29,7 +29,7 @@ FUSION_FEATURE(GeneralBack2Back, hasBack2BackFusion, -1, ADDIStocHA8, ADDIdtprelL32, ADDItlsldLADDR32, - ADDItocL, + ADDItocL8, ADDME, ADDME8, ADDME8O, @@ -518,7 +518,7 @@ FUSION_FEATURE(GeneralBack2Back, hasBack2BackFusion, -1, ADDIStocHA8, ADDIdtprelL32, ADDItlsldLADDR32, - ADDItocL, + ADDItocL8, ADDME, ADDME8, ADDME8O, diff --git a/llvm/lib/Target/PowerPC/PPCFastISel.cpp b/llvm/lib/Target/PowerPC/PPCFastISel.cpp index 56af80f..6e31cda 100644 --- a/llvm/lib/Target/PowerPC/PPCFastISel.cpp +++ b/llvm/lib/Target/PowerPC/PPCFastISel.cpp @@ -2094,7 +2094,7 @@ unsigned PPCFastISel::PPCMaterializeGV(const GlobalValue *GV, MVT VT) { // for large code model, we generate: // LDtocL(GV, ADDIStocHA8(%x2, GV)) // Otherwise we generate: - // ADDItocL(ADDIStocHA8(%x2, GV), GV) + // ADDItocL8(ADDIStocHA8(%x2, GV), GV) // Either way, start with the ADDIStocHA8: Register HighPartReg = createResultReg(RC); BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, TII.get(PPC::ADDIStocHA8), @@ -2104,9 +2104,11 @@ unsigned PPCFastISel::PPCMaterializeGV(const GlobalValue *GV, MVT VT) { BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, TII.get(PPC::LDtocL), DestReg).addGlobalAddress(GV).addReg(HighPartReg); } else { - // Otherwise generate the ADDItocL. - BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, TII.get(PPC::ADDItocL), - DestReg).addReg(HighPartReg).addGlobalAddress(GV); + // Otherwise generate the ADDItocL8. + BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, TII.get(PPC::ADDItocL8), + DestReg) + .addReg(HighPartReg) + .addGlobalAddress(GV); } } diff --git a/llvm/lib/Target/PowerPC/PPCISelDAGToDAG.cpp b/llvm/lib/Target/PowerPC/PPCISelDAGToDAG.cpp index 9e5f0b3..0c25accd 100644 --- a/llvm/lib/Target/PowerPC/PPCISelDAGToDAG.cpp +++ b/llvm/lib/Target/PowerPC/PPCISelDAGToDAG.cpp @@ -521,40 +521,6 @@ static bool hasTocDataAttr(SDValue Val, unsigned PointerSize) { if (!GV->hasAttribute("toc-data")) return false; - - // TODO: These asserts should be updated as more support for the toc data - // transformation is added (struct support, etc.). - - assert( - PointerSize >= GV->getAlign().valueOrOne().value() && - "GlobalVariables with an alignment requirement stricter than TOC entry " - "size not supported by the toc data transformation."); - - Type *GVType = GV->getValueType(); - - assert(GVType->isSized() && "A GlobalVariable's size must be known to be " - "supported by the toc data transformation."); - - if (GVType->isVectorTy()) - report_fatal_error("A GlobalVariable of Vector type is not currently " - "supported by the toc data transformation."); - - if (GVType->isArrayTy()) - report_fatal_error("A GlobalVariable of Array type is not currently " - "supported by the toc data transformation."); - - if (GVType->isStructTy()) - report_fatal_error("A GlobalVariable of Struct type is not currently " - "supported by the toc data transformation."); - - assert(GVType->getPrimitiveSizeInBits() <= PointerSize * 8 && - "A GlobalVariable with size larger than a TOC entry is not currently " - "supported by the toc data transformation."); - - if (GV->hasPrivateLinkage()) - report_fatal_error("A GlobalVariable with private linkage is not " - "currently supported by the toc data transformation."); - return true; } @@ -6168,7 +6134,7 @@ void PPCDAGToDAGISel::Select(SDNode *N) { // [64-bit ELF/AIX] // LDtocL(@sym, ADDIStocHA8(%x2, @sym)) // Otherwise we generate: - // ADDItocL(ADDIStocHA8(%x2, @sym), @sym) + // ADDItocL8(ADDIStocHA8(%x2, @sym), @sym) SDValue GA = N->getOperand(0); SDValue TOCbase = N->getOperand(1); @@ -6188,7 +6154,7 @@ void PPCDAGToDAGISel::Select(SDNode *N) { } // Build the address relative to the TOC-pointer. - ReplaceNode(N, CurDAG->getMachineNode(PPC::ADDItocL, dl, MVT::i64, + ReplaceNode(N, CurDAG->getMachineNode(PPC::ADDItocL8, dl, MVT::i64, SDValue(Tmp, 0), GA)); return; } @@ -7741,7 +7707,7 @@ void PPCDAGToDAGISel::PeepholePPC64() { // target flags on the immediate operand when we fold it into the // load instruction. // - // For something like ADDItocL, the relocation information is + // For something like ADDItocL8, the relocation information is // inferred from the opcode; when we process it in the AsmPrinter, // we add the necessary relocation there. A load, though, can receive // relocation from various flavors of ADDIxxx, so we need to carry @@ -7762,7 +7728,7 @@ void PPCDAGToDAGISel::PeepholePPC64() { case PPC::ADDItlsldL: Flags = PPCII::MO_TLSLD_LO; break; - case PPC::ADDItocL: + case PPC::ADDItocL8: Flags = PPCII::MO_TOC_LO; break; } @@ -7789,7 +7755,7 @@ void PPCDAGToDAGISel::PeepholePPC64() { // If we have a addi(toc@l)/addis(toc@ha) pair, and the addis has only // one use, then we can do this for any offset, we just need to also // update the offset (i.e. the symbol addend) on the addis also. - if (Base.getMachineOpcode() != PPC::ADDItocL) + if (Base.getMachineOpcode() != PPC::ADDItocL8) continue; if (!HBase.isMachineOpcode() || diff --git a/llvm/lib/Target/PowerPC/PPCInstr64Bit.td b/llvm/lib/Target/PowerPC/PPCInstr64Bit.td index 2949d58..a935979 100644 --- a/llvm/lib/Target/PowerPC/PPCInstr64Bit.td +++ b/llvm/lib/Target/PowerPC/PPCInstr64Bit.td @@ -1480,8 +1480,8 @@ let hasSideEffects = 0 in { let isReMaterializable = 1 in { def ADDIStocHA8: PPCEmitTimePseudo<(outs g8rc:$rD), (ins g8rc_nox0:$reg, tocentry:$disp), "#ADDIStocHA8", []>, isPPC64; -def ADDItocL: PPCEmitTimePseudo<(outs g8rc:$rD), (ins g8rc_nox0:$reg, tocentry:$disp), - "#ADDItocL", []>, isPPC64; +def ADDItocL8: PPCEmitTimePseudo<(outs g8rc:$rD), (ins g8rc_nox0:$reg, tocentry:$disp), + "#ADDItocL8", []>, isPPC64; } // Local Data Transform diff --git a/llvm/lib/Target/PowerPC/PPCInstrInfo.cpp b/llvm/lib/Target/PowerPC/PPCInstrInfo.cpp index 5d37e92..5f5eb31 100644 --- a/llvm/lib/Target/PowerPC/PPCInstrInfo.cpp +++ b/llvm/lib/Target/PowerPC/PPCInstrInfo.cpp @@ -1077,7 +1077,7 @@ bool PPCInstrInfo::isReallyTriviallyReMaterializable( case PPC::LIS8: case PPC::ADDIStocHA: case PPC::ADDIStocHA8: - case PPC::ADDItocL: + case PPC::ADDItocL8: case PPC::LOAD_STACK_GUARD: case PPC::PPCLdFixedAddr: case PPC::XXLXORz: @@ -3453,7 +3453,7 @@ MachineInstr *PPCInstrInfo::getForwardingDefMI( break; case PPC::LI: case PPC::LI8: - case PPC::ADDItocL: + case PPC::ADDItocL8: case PPC::ADDI: case PPC::ADDI8: OpNoForForwarding = i; @@ -4420,7 +4420,7 @@ bool PPCInstrInfo::isDefMIElgibleForForwarding(MachineInstr &DefMI, MachineOperand *&ImmMO, MachineOperand *&RegMO) const { unsigned Opc = DefMI.getOpcode(); - if (Opc != PPC::ADDItocL && Opc != PPC::ADDI && Opc != PPC::ADDI8) + if (Opc != PPC::ADDItocL8 && Opc != PPC::ADDI && Opc != PPC::ADDI8) return false; assert(DefMI.getNumOperands() >= 3 && @@ -4485,8 +4485,8 @@ bool PPCInstrInfo::isImmElgibleForForwarding(const MachineOperand &ImmMO, int64_t &Imm, int64_t BaseImm) const { assert(isAnImmediateOperand(ImmMO) && "ImmMO is NOT an immediate"); - if (DefMI.getOpcode() == PPC::ADDItocL) { - // The operand for ADDItocL is CPI, which isn't imm at compiling time, + if (DefMI.getOpcode() == PPC::ADDItocL8) { + // The operand for ADDItocL8 is CPI, which isn't imm at compiling time, // However, we know that, it is 16-bit width, and has the alignment of 4. // Check if the instruction met the requirement. if (III.ImmMustBeMultipleOf > 4 || @@ -4899,7 +4899,7 @@ bool PPCInstrInfo::transformToImmFormFedByAdd( // register with ImmMO. // Before that, we need to fixup the target flags for imm. // For some reason, we miss to set the flag for the ImmMO if it is CPI. - if (DefMI.getOpcode() == PPC::ADDItocL) + if (DefMI.getOpcode() == PPC::ADDItocL8) ImmMO->setTargetFlags(PPCII::MO_TOC_LO); // MI didn't have the interface such as MI.setOperand(i) though diff --git a/llvm/lib/Target/PowerPC/PPCMacroFusion.def b/llvm/lib/Target/PowerPC/PPCMacroFusion.def index 6b8ad22..fb6e656 100644 --- a/llvm/lib/Target/PowerPC/PPCMacroFusion.def +++ b/llvm/lib/Target/PowerPC/PPCMacroFusion.def @@ -32,7 +32,7 @@ // {addi} followed by one of these {lxvd2x, lxvw4x, lxvdsx, lvebx, lvehx, // lvewx, lvx, lxsdx} FUSION_FEATURE(AddiLoad, hasAddiLoadFusion, 2, \ - FUSION_OP_SET(ADDI, ADDI8, ADDItocL), \ + FUSION_OP_SET(ADDI, ADDI8, ADDItocL8), \ FUSION_OP_SET(LXVD2X, LXVW4X, LXVDSX, LVEBX, LVEHX, LVEWX, \ LVX, LXSDX)) @@ -135,11 +135,11 @@ FUSION_FEATURE(XorisXori, hasWideImmFusion, 1, FUSION_OP_SET(XORIS, XORIS8), // addis rx,ra,si - addi rt,rx,SI, SI >= 0 FUSION_FEATURE(AddisAddi, hasWideImmFusion, 1, FUSION_OP_SET(ADDIS, ADDIS8, ADDIStocHA8), - FUSION_OP_SET(ADDI, ADDI8, ADDItocL)) + FUSION_OP_SET(ADDI, ADDI8, ADDItocL8)) // addi rx,ra,si - addis rt,rx,SI, ra > 0, SI >= 2 FUSION_FEATURE(AddiAddis, hasWideImmFusion, 1, - FUSION_OP_SET(ADDI, ADDI8, ADDItocL), + FUSION_OP_SET(ADDI, ADDI8, ADDItocL8), FUSION_OP_SET(ADDIS, ADDIS8, ADDIStocHA8)) // mtctr - { bcctr,bcctrl } diff --git a/llvm/lib/Target/PowerPC/PPCSubtarget.cpp b/llvm/lib/Target/PowerPC/PPCSubtarget.cpp index 5380ec1..884f2f5 100644 --- a/llvm/lib/Target/PowerPC/PPCSubtarget.cpp +++ b/llvm/lib/Target/PowerPC/PPCSubtarget.cpp @@ -185,6 +185,28 @@ bool PPCSubtarget::enableSubRegLiveness() const { return UseSubRegLiveness; } +void PPCSubtarget::tocDataChecks(unsigned PointerSize, + const GlobalVariable *GV) const { + // TODO: These asserts should be updated as more support for the toc data + // transformation is added (struct support, etc.). + assert( + PointerSize >= GV->getAlign().valueOrOne().value() && + "GlobalVariables with an alignment requirement stricter than TOC entry " + "size not supported by the toc data transformation."); + + Type *GVType = GV->getValueType(); + assert(GVType->isSized() && "A GlobalVariable's size must be known to be " + "supported by the toc data transformation."); + if (GV->getParent()->getDataLayout().getTypeSizeInBits(GVType) > + PointerSize * 8) + report_fatal_error( + "A GlobalVariable with size larger than a TOC entry is not currently " + "supported by the toc data transformation."); + if (GV->hasPrivateLinkage()) + report_fatal_error("A GlobalVariable with private linkage is not " + "currently supported by the toc data transformation."); +} + bool PPCSubtarget::isGVIndirectSymbol(const GlobalValue *GV) const { // Large code model always uses the TOC even for local symbols. if (TM.getCodeModel() == CodeModel::Large) diff --git a/llvm/lib/Target/PowerPC/PPCSubtarget.h b/llvm/lib/Target/PowerPC/PPCSubtarget.h index 306a52d..d913f22 100644 --- a/llvm/lib/Target/PowerPC/PPCSubtarget.h +++ b/llvm/lib/Target/PowerPC/PPCSubtarget.h @@ -245,6 +245,8 @@ public: /// True if the GV will be accessed via an indirect symbol. bool isGVIndirectSymbol(const GlobalValue *GV) const; + void tocDataChecks(unsigned PointerSize, const GlobalVariable *GV) const; + /// True if the ABI is descriptor based. bool usesFunctionDescriptors() const { // Both 32-bit and 64-bit AIX are descriptor based. For ELF only the 64-bit diff --git a/llvm/lib/Target/PowerPC/PPCTOCRegDeps.cpp b/llvm/lib/Target/PowerPC/PPCTOCRegDeps.cpp index 81f078a..0527991 100644 --- a/llvm/lib/Target/PowerPC/PPCTOCRegDeps.cpp +++ b/llvm/lib/Target/PowerPC/PPCTOCRegDeps.cpp @@ -94,8 +94,7 @@ namespace { protected: bool hasTOCLoReloc(const MachineInstr &MI) { - if (MI.getOpcode() == PPC::LDtocL || - MI.getOpcode() == PPC::ADDItocL || + if (MI.getOpcode() == PPC::LDtocL || MI.getOpcode() == PPC::ADDItocL8 || MI.getOpcode() == PPC::LWZtocL) return true; diff --git a/llvm/lib/Target/RISCV/AsmParser/RISCVAsmParser.cpp b/llvm/lib/Target/RISCV/AsmParser/RISCVAsmParser.cpp index d83979a..2da75bd 100644 --- a/llvm/lib/Target/RISCV/AsmParser/RISCVAsmParser.cpp +++ b/llvm/lib/Target/RISCV/AsmParser/RISCVAsmParser.cpp @@ -2716,7 +2716,7 @@ ParseStatus RISCVAsmParser::parseDirective(AsmToken DirectiveID) { bool RISCVAsmParser::resetToArch(StringRef Arch, SMLoc Loc, std::string &Result, bool FromOptionDirective) { - for (auto Feature : RISCVFeatureKV) + for (auto &Feature : RISCVFeatureKV) if (llvm::RISCVISAInfo::isSupportedExtensionFeature(Feature.Key)) clearFeatureBits(Feature.Value, Feature.Key); @@ -2735,7 +2735,7 @@ bool RISCVAsmParser::resetToArch(StringRef Arch, SMLoc Loc, std::string &Result, } auto &ISAInfo = *ParseResult; - for (auto Feature : RISCVFeatureKV) + for (auto &Feature : RISCVFeatureKV) if (ISAInfo->hasExtension(Feature.Key)) setFeatureBits(Feature.Value, Feature.Key); @@ -2823,9 +2823,8 @@ bool RISCVAsmParser::parseDirectiveOption() { break; } - ArrayRef<SubtargetFeatureKV> KVArray(RISCVFeatureKV); - auto Ext = llvm::lower_bound(KVArray, Arch); - if (Ext == KVArray.end() || StringRef(Ext->Key) != Arch || + auto Ext = llvm::lower_bound(RISCVFeatureKV, Arch); + if (Ext == std::end(RISCVFeatureKV) || StringRef(Ext->Key) != Arch || !RISCVISAInfo::isSupportedExtension(Arch)) { if (isDigit(Arch.back())) return Error( @@ -2858,7 +2857,7 @@ bool RISCVAsmParser::parseDirectiveOption() { // It is invalid to disable an extension that there are other enabled // extensions depend on it. // TODO: Make use of RISCVISAInfo to handle this - for (auto Feature : KVArray) { + for (auto &Feature : RISCVFeatureKV) { if (getSTI().hasFeature(Feature.Value) && Feature.Implies.test(Ext->Value)) return Error(Loc, @@ -3271,11 +3270,13 @@ void RISCVAsmParser::emitVMSGE(MCInst &Inst, unsigned Opcode, SMLoc IDLoc, .addOperand(Inst.getOperand(0)) .addOperand(Inst.getOperand(1)) .addOperand(Inst.getOperand(2)) - .addReg(RISCV::NoRegister)); + .addReg(RISCV::NoRegister) + .setLoc(IDLoc)); emitToStreamer(Out, MCInstBuilder(RISCV::VMNAND_MM) .addOperand(Inst.getOperand(0)) .addOperand(Inst.getOperand(0)) - .addOperand(Inst.getOperand(0))); + .addOperand(Inst.getOperand(0)) + .setLoc(IDLoc)); } else if (Inst.getNumOperands() == 4) { // masked va >= x, vd != v0 // @@ -3287,11 +3288,13 @@ void RISCVAsmParser::emitVMSGE(MCInst &Inst, unsigned Opcode, SMLoc IDLoc, .addOperand(Inst.getOperand(0)) .addOperand(Inst.getOperand(1)) .addOperand(Inst.getOperand(2)) - .addOperand(Inst.getOperand(3))); + .addOperand(Inst.getOperand(3)) + .setLoc(IDLoc)); emitToStreamer(Out, MCInstBuilder(RISCV::VMXOR_MM) .addOperand(Inst.getOperand(0)) .addOperand(Inst.getOperand(0)) - .addReg(RISCV::V0)); + .addReg(RISCV::V0) + .setLoc(IDLoc)); } else if (Inst.getNumOperands() == 5 && Inst.getOperand(0).getReg() == RISCV::V0) { // masked va >= x, vd == v0 @@ -3306,11 +3309,13 @@ void RISCVAsmParser::emitVMSGE(MCInst &Inst, unsigned Opcode, SMLoc IDLoc, .addOperand(Inst.getOperand(1)) .addOperand(Inst.getOperand(2)) .addOperand(Inst.getOperand(3)) - .addReg(RISCV::NoRegister)); + .addReg(RISCV::NoRegister) + .setLoc(IDLoc)); emitToStreamer(Out, MCInstBuilder(RISCV::VMANDN_MM) .addOperand(Inst.getOperand(0)) .addOperand(Inst.getOperand(0)) - .addOperand(Inst.getOperand(1))); + .addOperand(Inst.getOperand(1)) + .setLoc(IDLoc)); } else if (Inst.getNumOperands() == 5) { // masked va >= x, any vd // @@ -3323,19 +3328,23 @@ void RISCVAsmParser::emitVMSGE(MCInst &Inst, unsigned Opcode, SMLoc IDLoc, .addOperand(Inst.getOperand(1)) .addOperand(Inst.getOperand(2)) .addOperand(Inst.getOperand(3)) - .addReg(RISCV::NoRegister)); + .addReg(RISCV::NoRegister) + .setLoc(IDLoc)); emitToStreamer(Out, MCInstBuilder(RISCV::VMANDN_MM) .addOperand(Inst.getOperand(1)) .addReg(RISCV::V0) - .addOperand(Inst.getOperand(1))); + .addOperand(Inst.getOperand(1)) + .setLoc(IDLoc)); emitToStreamer(Out, MCInstBuilder(RISCV::VMANDN_MM) .addOperand(Inst.getOperand(0)) .addOperand(Inst.getOperand(0)) - .addReg(RISCV::V0)); + .addReg(RISCV::V0) + .setLoc(IDLoc)); emitToStreamer(Out, MCInstBuilder(RISCV::VMOR_MM) .addOperand(Inst.getOperand(0)) .addOperand(Inst.getOperand(1)) - .addOperand(Inst.getOperand(0))); + .addOperand(Inst.getOperand(0)) + .setLoc(IDLoc)); } } @@ -3637,7 +3646,8 @@ bool RISCVAsmParser::processInstruction(MCInst &Inst, SMLoc IDLoc, .addOperand(Inst.getOperand(0)) .addOperand(Inst.getOperand(1)) .addImm(Imm - 1) - .addOperand(Inst.getOperand(3))); + .addOperand(Inst.getOperand(3)) + .setLoc(IDLoc)); return false; } case RISCV::PseudoVMSGEU_VI: @@ -3655,7 +3665,8 @@ bool RISCVAsmParser::processInstruction(MCInst &Inst, SMLoc IDLoc, .addOperand(Inst.getOperand(0)) .addOperand(Inst.getOperand(1)) .addOperand(Inst.getOperand(1)) - .addOperand(Inst.getOperand(3))); + .addOperand(Inst.getOperand(3)) + .setLoc(IDLoc)); } else { // Other immediate values can subtract one like signed. unsigned Opc = Inst.getOpcode() == RISCV::PseudoVMSGEU_VI @@ -3665,7 +3676,8 @@ bool RISCVAsmParser::processInstruction(MCInst &Inst, SMLoc IDLoc, .addOperand(Inst.getOperand(0)) .addOperand(Inst.getOperand(1)) .addImm(Imm - 1) - .addOperand(Inst.getOperand(3))); + .addOperand(Inst.getOperand(3)) + .setLoc(IDLoc)); } return false; diff --git a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp index 08678a8..803774f 100644 --- a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp +++ b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp @@ -10466,6 +10466,7 @@ SDValue RISCVTargetLowering::lowerMaskedStore(SDValue Op, SDValue BasePtr = MemSD->getBasePtr(); SDValue Val, Mask, VL; + bool IsCompressingStore = false; if (const auto *VPStore = dyn_cast<VPStoreSDNode>(Op)) { Val = VPStore->getValue(); Mask = VPStore->getMask(); @@ -10474,9 +10475,11 @@ SDValue RISCVTargetLowering::lowerMaskedStore(SDValue Op, const auto *MStore = cast<MaskedStoreSDNode>(Op); Val = MStore->getValue(); Mask = MStore->getMask(); + IsCompressingStore = MStore->isCompressingStore(); } - bool IsUnmasked = ISD::isConstantSplatVectorAllOnes(Mask.getNode()); + bool IsUnmasked = + ISD::isConstantSplatVectorAllOnes(Mask.getNode()) || IsCompressingStore; MVT VT = Val.getSimpleValueType(); MVT XLenVT = Subtarget.getXLenVT(); @@ -10486,7 +10489,7 @@ SDValue RISCVTargetLowering::lowerMaskedStore(SDValue Op, ContainerVT = getContainerForFixedLengthVector(VT); Val = convertToScalableVector(ContainerVT, Val, DAG, Subtarget); - if (!IsUnmasked) { + if (!IsUnmasked || IsCompressingStore) { MVT MaskVT = getMaskTypeFor(ContainerVT); Mask = convertToScalableVector(MaskVT, Mask, DAG, Subtarget); } @@ -10495,6 +10498,15 @@ SDValue RISCVTargetLowering::lowerMaskedStore(SDValue Op, if (!VL) VL = getDefaultVLOps(VT, ContainerVT, DL, DAG, Subtarget).second; + if (IsCompressingStore) { + Val = DAG.getNode(ISD::INTRINSIC_WO_CHAIN, DL, ContainerVT, + DAG.getConstant(Intrinsic::riscv_vcompress, DL, XLenVT), + DAG.getUNDEF(ContainerVT), Val, Mask, VL); + VL = + DAG.getNode(RISCVISD::VCPOP_VL, DL, XLenVT, Mask, + getAllOnesMask(Mask.getSimpleValueType(), VL, DL, DAG), VL); + } + unsigned IntID = IsUnmasked ? Intrinsic::riscv_vse : Intrinsic::riscv_vse_mask; SmallVector<SDValue, 8> Ops{Chain, DAG.getTargetConstant(IntID, DL, XLenVT)}; diff --git a/llvm/lib/Target/RISCV/RISCVTargetTransformInfo.cpp b/llvm/lib/Target/RISCV/RISCVTargetTransformInfo.cpp index ecd3736..8f46fdc 100644 --- a/llvm/lib/Target/RISCV/RISCVTargetTransformInfo.cpp +++ b/llvm/lib/Target/RISCV/RISCVTargetTransformInfo.cpp @@ -1620,3 +1620,13 @@ bool RISCVTTIImpl::isLSRCostLess(const TargetTransformInfo::LSRCost &C1, C2.NumIVMuls, C2.NumBaseAdds, C2.ScaleCost, C2.ImmCost, C2.SetupCost); } + +bool RISCVTTIImpl::isLegalMaskedCompressStore(Type *DataTy, Align Alignment) { + auto *VTy = dyn_cast<VectorType>(DataTy); + if (!VTy || VTy->isScalableTy()) + return false; + + if (!isLegalMaskedLoadStore(DataTy, Alignment)) + return false; + return true; +} diff --git a/llvm/lib/Target/RISCV/RISCVTargetTransformInfo.h b/llvm/lib/Target/RISCV/RISCVTargetTransformInfo.h index af36e9d..8daf684 100644 --- a/llvm/lib/Target/RISCV/RISCVTargetTransformInfo.h +++ b/llvm/lib/Target/RISCV/RISCVTargetTransformInfo.h @@ -261,6 +261,8 @@ public: return TLI->isLegalStridedLoadStore(DataTypeVT, Alignment); } + bool isLegalMaskedCompressStore(Type *DataTy, Align Alignment); + bool isVScaleKnownToBeAPowerOfTwo() const { return TLI->isVScaleKnownToBeAPowerOfTwo(); } diff --git a/llvm/lib/Target/SPIRV/SPIRVCallLowering.cpp b/llvm/lib/Target/SPIRV/SPIRVCallLowering.cpp index 2d7a00b..f1fbe2b 100644 --- a/llvm/lib/Target/SPIRV/SPIRVCallLowering.cpp +++ b/llvm/lib/Target/SPIRV/SPIRVCallLowering.cpp @@ -85,6 +85,42 @@ static ConstantInt *getConstInt(MDNode *MD, unsigned NumOp) { return nullptr; } +// If the function has pointer arguments, we are forced to re-create this +// function type from the very beginning, changing PointerType by +// TypedPointerType for each pointer argument. Otherwise, the same `Type*` +// potentially corresponds to different SPIR-V function type, effectively +// invalidating logic behind global registry and duplicates tracker. +static FunctionType * +fixFunctionTypeIfPtrArgs(SPIRVGlobalRegistry *GR, const Function &F, + FunctionType *FTy, const SPIRVType *SRetTy, + const SmallVector<SPIRVType *, 4> &SArgTys) { + if (F.getParent()->getNamedMetadata("spv.cloned_funcs")) + return FTy; + + bool hasArgPtrs = false; + for (auto &Arg : F.args()) { + // check if it's an instance of a non-typed PointerType + if (Arg.getType()->isPointerTy()) { + hasArgPtrs = true; + break; + } + } + if (!hasArgPtrs) { + Type *RetTy = FTy->getReturnType(); + // check if it's an instance of a non-typed PointerType + if (!RetTy->isPointerTy()) + return FTy; + } + + // re-create function type, using TypedPointerType instead of PointerType to + // properly trace argument types + const Type *RetTy = GR->getTypeForSPIRVType(SRetTy); + SmallVector<Type *, 4> ArgTys; + for (auto SArgTy : SArgTys) + ArgTys.push_back(const_cast<Type *>(GR->getTypeForSPIRVType(SArgTy))); + return FunctionType::get(const_cast<Type *>(RetTy), ArgTys, false); +} + // This code restores function args/retvalue types for composite cases // because the final types should still be aggregate whereas they're i32 // during the translation to cope with aggregate flattening etc. @@ -162,7 +198,7 @@ static SPIRVType *getArgSPIRVType(const Function &F, unsigned ArgIdx, // If OriginalArgType is non-pointer, use the OriginalArgType (the type cannot // be legally reassigned later). - if (!OriginalArgType->isPointerTy()) + if (!isPointerTy(OriginalArgType)) return GR->getOrCreateSPIRVType(OriginalArgType, MIRBuilder, ArgAccessQual); // In case OriginalArgType is of pointer type, there are three possibilities: @@ -179,8 +215,7 @@ static SPIRVType *getArgSPIRVType(const Function &F, unsigned ArgIdx, SPIRVType *ElementType = GR->getOrCreateSPIRVType(ByValRefType, MIRBuilder); return GR->getOrCreateSPIRVPointerType( ElementType, MIRBuilder, - addressSpaceToStorageClass(Arg->getType()->getPointerAddressSpace(), - ST)); + addressSpaceToStorageClass(getPointerAddressSpace(Arg->getType()), ST)); } for (auto User : Arg->users()) { @@ -240,7 +275,6 @@ bool SPIRVCallLowering::lowerFormalArguments(MachineIRBuilder &MIRBuilder, static_cast<const SPIRVSubtarget *>(&MIRBuilder.getMF().getSubtarget()); // Assign types and names to all args, and store their types for later. - FunctionType *FTy = getOriginalFunctionType(F); SmallVector<SPIRVType *, 4> ArgTypeVRegs; if (VRegs.size() > 0) { unsigned i = 0; @@ -255,7 +289,7 @@ bool SPIRVCallLowering::lowerFormalArguments(MachineIRBuilder &MIRBuilder, if (Arg.hasName()) buildOpName(VRegs[i][0], Arg.getName(), MIRBuilder); - if (Arg.getType()->isPointerTy()) { + if (isPointerTy(Arg.getType())) { auto DerefBytes = static_cast<unsigned>(Arg.getDereferenceableBytes()); if (DerefBytes != 0) buildOpDecorate(VRegs[i][0], MIRBuilder, @@ -322,7 +356,9 @@ bool SPIRVCallLowering::lowerFormalArguments(MachineIRBuilder &MIRBuilder, MRI->setRegClass(FuncVReg, &SPIRV::IDRegClass); if (F.isDeclaration()) GR->add(&F, &MIRBuilder.getMF(), FuncVReg); + FunctionType *FTy = getOriginalFunctionType(F); SPIRVType *RetTy = GR->getOrCreateSPIRVType(FTy->getReturnType(), MIRBuilder); + FTy = fixFunctionTypeIfPtrArgs(GR, F, FTy, RetTy, ArgTypeVRegs); SPIRVType *FuncTy = GR->getOrCreateOpTypeFunctionWithArgs( FTy, RetTy, ArgTypeVRegs, MIRBuilder); uint32_t FuncControl = getFunctionControl(F); @@ -429,7 +465,6 @@ bool SPIRVCallLowering::lowerCall(MachineIRBuilder &MIRBuilder, return false; MachineFunction &MF = MIRBuilder.getMF(); GR->setCurrentFunc(MF); - FunctionType *FTy = nullptr; const Function *CF = nullptr; std::string DemangledName; const Type *OrigRetTy = Info.OrigRet.Ty; @@ -444,7 +479,7 @@ bool SPIRVCallLowering::lowerCall(MachineIRBuilder &MIRBuilder, // TODO: support constexpr casts and indirect calls. if (CF == nullptr) return false; - if ((FTy = getOriginalFunctionType(*CF)) != nullptr) + if (FunctionType *FTy = getOriginalFunctionType(*CF)) OrigRetTy = FTy->getReturnType(); } diff --git a/llvm/lib/Target/SPIRV/SPIRVEmitIntrinsics.cpp b/llvm/lib/Target/SPIRV/SPIRVEmitIntrinsics.cpp index 575e903..c5b9012 100644 --- a/llvm/lib/Target/SPIRV/SPIRVEmitIntrinsics.cpp +++ b/llvm/lib/Target/SPIRV/SPIRVEmitIntrinsics.cpp @@ -57,8 +57,14 @@ class SPIRVEmitIntrinsics bool TrackConstants = true; DenseMap<Instruction *, Constant *> AggrConsts; DenseSet<Instruction *> AggrStores; + + // deduce values type + DenseMap<Value *, Type *> DeducedElTys; + Type *deduceElementType(Value *I); + void preprocessCompositeConstants(IRBuilder<> &B); void preprocessUndefs(IRBuilder<> &B); + CallInst *buildIntrWithMD(Intrinsic::ID IntrID, ArrayRef<Type *> Types, Value *Arg, Value *Arg2, ArrayRef<Constant *> Imms, IRBuilder<> &B) { @@ -72,6 +78,7 @@ class SPIRVEmitIntrinsics Args.push_back(Imm); return B.CreateIntrinsic(IntrID, {Types}, Args); } + void replaceMemInstrUses(Instruction *Old, Instruction *New, IRBuilder<> &B); void processInstrAfterVisit(Instruction *I, IRBuilder<> &B); void insertAssignPtrTypeIntrs(Instruction *I, IRBuilder<> &B); @@ -156,6 +163,48 @@ static inline void reportFatalOnTokenType(const Instruction *I) { false); } +// Deduce and return a successfully deduced Type of the Instruction, +// or nullptr otherwise. +static Type *deduceElementTypeHelper(Value *I, + std::unordered_set<Value *> &Visited, + DenseMap<Value *, Type *> &DeducedElTys) { + // maybe already known + auto It = DeducedElTys.find(I); + if (It != DeducedElTys.end()) + return It->second; + + // maybe a cycle + if (Visited.find(I) != Visited.end()) + return nullptr; + Visited.insert(I); + + // fallback value in case when we fail to deduce a type + Type *Ty = nullptr; + // look for known basic patterns of type inference + if (auto *Ref = dyn_cast<AllocaInst>(I)) + Ty = Ref->getAllocatedType(); + else if (auto *Ref = dyn_cast<GetElementPtrInst>(I)) + Ty = Ref->getResultElementType(); + else if (auto *Ref = dyn_cast<GlobalValue>(I)) + Ty = Ref->getValueType(); + else if (auto *Ref = dyn_cast<AddrSpaceCastInst>(I)) + Ty = deduceElementTypeHelper(Ref->getPointerOperand(), Visited, + DeducedElTys); + + // remember the found relationship + if (Ty) + DeducedElTys[I] = Ty; + + return Ty; +} + +Type *SPIRVEmitIntrinsics::deduceElementType(Value *I) { + std::unordered_set<Value *> Visited; + if (Type *Ty = deduceElementTypeHelper(I, Visited, DeducedElTys)) + return Ty; + return IntegerType::getInt8Ty(I->getContext()); +} + void SPIRVEmitIntrinsics::replaceMemInstrUses(Instruction *Old, Instruction *New, IRBuilder<> &B) { @@ -280,7 +329,7 @@ Instruction *SPIRVEmitIntrinsics::visitBitCastInst(BitCastInst &I) { // varying element types. In case of IR coming from older versions of LLVM // such bitcasts do not provide sufficient information, should be just skipped // here, and handled in insertPtrCastOrAssignTypeInstr. - if (I.getType()->isPointerTy()) { + if (isPointerTy(I.getType())) { I.replaceAllUsesWith(Source); I.eraseFromParent(); return nullptr; @@ -333,20 +382,10 @@ void SPIRVEmitIntrinsics::replacePointerOperandWithPtrCast( while (BitCastInst *BC = dyn_cast<BitCastInst>(Pointer)) Pointer = BC->getOperand(0); - // Do not emit spv_ptrcast if Pointer is a GlobalValue of expected type. - GlobalValue *GV = dyn_cast<GlobalValue>(Pointer); - if (GV && GV->getValueType() == ExpectedElementType) - return; - - // Do not emit spv_ptrcast if Pointer is a result of alloca with expected - // type. - AllocaInst *A = dyn_cast<AllocaInst>(Pointer); - if (A && A->getAllocatedType() == ExpectedElementType) - return; - - // Do not emit spv_ptrcast if Pointer is a result of GEP of expected type. - GetElementPtrInst *GEPI = dyn_cast<GetElementPtrInst>(Pointer); - if (GEPI && GEPI->getResultElementType() == ExpectedElementType) + // Do not emit spv_ptrcast if Pointer's element type is ExpectedElementType + std::unordered_set<Value *> Visited; + Type *PointerElemTy = deduceElementTypeHelper(Pointer, Visited, DeducedElTys); + if (PointerElemTy == ExpectedElementType) return; setInsertPointSkippingPhis(B, I); @@ -356,7 +395,7 @@ void SPIRVEmitIntrinsics::replacePointerOperandWithPtrCast( ValueAsMetadata::getConstant(ExpectedElementTypeConst); MDTuple *TyMD = MDNode::get(F->getContext(), CM); MetadataAsValue *VMD = MetadataAsValue::get(F->getContext(), TyMD); - unsigned AddressSpace = Pointer->getType()->getPointerAddressSpace(); + unsigned AddressSpace = getPointerAddressSpace(Pointer->getType()); bool FirstPtrCastOrAssignPtrType = true; // Do not emit new spv_ptrcast if equivalent one already exists or when @@ -401,9 +440,11 @@ void SPIRVEmitIntrinsics::replacePointerOperandWithPtrCast( // spv_assign_ptr_type instead. if (FirstPtrCastOrAssignPtrType && (isa<Instruction>(Pointer) || isa<Argument>(Pointer))) { - buildIntrWithMD(Intrinsic::spv_assign_ptr_type, {Pointer->getType()}, - ExpectedElementTypeConst, Pointer, - {B.getInt32(AddressSpace)}, B); + CallInst *CI = buildIntrWithMD( + Intrinsic::spv_assign_ptr_type, {Pointer->getType()}, + ExpectedElementTypeConst, Pointer, {B.getInt32(AddressSpace)}, B); + DeducedElTys[CI] = ExpectedElementType; + DeducedElTys[Pointer] = ExpectedElementType; return; } @@ -419,7 +460,7 @@ void SPIRVEmitIntrinsics::insertPtrCastOrAssignTypeInstr(Instruction *I, // Handle basic instructions: StoreInst *SI = dyn_cast<StoreInst>(I); if (SI && F->getCallingConv() == CallingConv::SPIR_KERNEL && - SI->getValueOperand()->getType()->isPointerTy() && + isPointerTy(SI->getValueOperand()->getType()) && isa<Argument>(SI->getValueOperand())) { return replacePointerOperandWithPtrCast( I, SI->getValueOperand(), IntegerType::getInt8Ty(F->getContext()), 0, @@ -440,9 +481,34 @@ void SPIRVEmitIntrinsics::insertPtrCastOrAssignTypeInstr(Instruction *I, if (!CI || CI->isIndirectCall() || CI->getCalledFunction()->isIntrinsic()) return; + // collect information about formal parameter types + Function *CalledF = CI->getCalledFunction(); + SmallVector<Type *, 4> CalledArgTys; + bool HaveTypes = false; + for (auto &CalledArg : CalledF->args()) { + if (!isPointerTy(CalledArg.getType())) { + CalledArgTys.push_back(nullptr); + continue; + } + auto It = DeducedElTys.find(&CalledArg); + Type *ParamTy = It != DeducedElTys.end() ? It->second : nullptr; + if (!ParamTy) { + for (User *U : CalledArg.users()) { + if (Instruction *Inst = dyn_cast<Instruction>(U)) { + std::unordered_set<Value *> Visited; + ParamTy = deduceElementTypeHelper(Inst, Visited, DeducedElTys); + if (ParamTy) + break; + } + } + } + HaveTypes |= ParamTy != nullptr; + CalledArgTys.push_back(ParamTy); + } + std::string DemangledName = getOclOrSpirvBuiltinDemangledName(CI->getCalledFunction()->getName()); - if (DemangledName.empty()) + if (DemangledName.empty() && !HaveTypes) return; for (unsigned OpIdx = 0; OpIdx < CI->arg_size(); OpIdx++) { @@ -455,8 +521,11 @@ void SPIRVEmitIntrinsics::insertPtrCastOrAssignTypeInstr(Instruction *I, if (!isa<Instruction>(ArgOperand) && !isa<Argument>(ArgOperand)) continue; - Type *ExpectedType = SPIRV::parseBuiltinCallArgumentBaseType( - DemangledName, OpIdx, I->getContext()); + Type *ExpectedType = + OpIdx < CalledArgTys.size() ? CalledArgTys[OpIdx] : nullptr; + if (!ExpectedType && !DemangledName.empty()) + ExpectedType = SPIRV::parseBuiltinCallArgumentBaseType( + DemangledName, OpIdx, I->getContext()); if (!ExpectedType) continue; @@ -639,30 +708,25 @@ void SPIRVEmitIntrinsics::processGlobalValue(GlobalVariable &GV, void SPIRVEmitIntrinsics::insertAssignPtrTypeIntrs(Instruction *I, IRBuilder<> &B) { reportFatalOnTokenType(I); - if (!I->getType()->isPointerTy() || !requireAssignType(I) || + if (!isPointerTy(I->getType()) || !requireAssignType(I) || isa<BitCastInst>(I)) return; setInsertPointSkippingPhis(B, I->getNextNode()); - Constant *EltTyConst; - unsigned AddressSpace = I->getType()->getPointerAddressSpace(); - if (auto *AI = dyn_cast<AllocaInst>(I)) - EltTyConst = UndefValue::get(AI->getAllocatedType()); - else if (auto *GEP = dyn_cast<GetElementPtrInst>(I)) - EltTyConst = UndefValue::get(GEP->getResultElementType()); - else - EltTyConst = UndefValue::get(IntegerType::getInt8Ty(I->getContext())); - - buildIntrWithMD(Intrinsic::spv_assign_ptr_type, {I->getType()}, EltTyConst, I, - {B.getInt32(AddressSpace)}, B); + Type *ElemTy = deduceElementType(I); + Constant *EltTyConst = UndefValue::get(ElemTy); + unsigned AddressSpace = getPointerAddressSpace(I->getType()); + CallInst *CI = buildIntrWithMD(Intrinsic::spv_assign_ptr_type, {I->getType()}, + EltTyConst, I, {B.getInt32(AddressSpace)}, B); + DeducedElTys[CI] = ElemTy; } void SPIRVEmitIntrinsics::insertAssignTypeIntrs(Instruction *I, IRBuilder<> &B) { reportFatalOnTokenType(I); Type *Ty = I->getType(); - if (!Ty->isVoidTy() && !Ty->isPointerTy() && requireAssignType(I)) { + if (!Ty->isVoidTy() && !isPointerTy(Ty) && requireAssignType(I)) { setInsertPointSkippingPhis(B, I->getNextNode()); Type *TypeToAssign = Ty; if (auto *II = dyn_cast<IntrinsicInst>(I)) { diff --git a/llvm/lib/Target/SPIRV/SPIRVGlobalRegistry.cpp b/llvm/lib/Target/SPIRV/SPIRVGlobalRegistry.cpp index 8556581..bda9c57 100644 --- a/llvm/lib/Target/SPIRV/SPIRVGlobalRegistry.cpp +++ b/llvm/lib/Target/SPIRV/SPIRVGlobalRegistry.cpp @@ -750,7 +750,7 @@ SPIRVType *SPIRVGlobalRegistry::createSPIRVType( SPIRVType *SPIRVGlobalRegistry::restOfCreateSPIRVType( const Type *Ty, MachineIRBuilder &MIRBuilder, SPIRV::AccessQualifier::AccessQualifier AccessQual, bool EmitIR) { - if (TypesInProcessing.count(Ty) && !Ty->isPointerTy()) + if (TypesInProcessing.count(Ty) && !isPointerTy(Ty)) return nullptr; TypesInProcessing.insert(Ty); SPIRVType *SpirvType = createSPIRVType(Ty, MIRBuilder, AccessQual, EmitIR); @@ -762,11 +762,15 @@ SPIRVType *SPIRVGlobalRegistry::restOfCreateSPIRVType( // will be added later. For special types it is already added to DT. if (SpirvType->getOpcode() != SPIRV::OpTypeForwardPointer && !Reg.isValid() && !isSpecialOpaqueType(Ty)) { - if (!Ty->isPointerTy()) + if (!isPointerTy(Ty)) DT.add(Ty, &MIRBuilder.getMF(), getSPIRVTypeID(SpirvType)); + else if (isTypedPointerTy(Ty)) + DT.add(cast<TypedPointerType>(Ty)->getElementType(), + getPointerAddressSpace(Ty), &MIRBuilder.getMF(), + getSPIRVTypeID(SpirvType)); else DT.add(Type::getInt8Ty(MIRBuilder.getMF().getFunction().getContext()), - Ty->getPointerAddressSpace(), &MIRBuilder.getMF(), + getPointerAddressSpace(Ty), &MIRBuilder.getMF(), getSPIRVTypeID(SpirvType)); } @@ -787,12 +791,15 @@ SPIRVType *SPIRVGlobalRegistry::getOrCreateSPIRVType( const Type *Ty, MachineIRBuilder &MIRBuilder, SPIRV::AccessQualifier::AccessQualifier AccessQual, bool EmitIR) { Register Reg; - if (!Ty->isPointerTy()) + if (!isPointerTy(Ty)) Reg = DT.find(Ty, &MIRBuilder.getMF()); + else if (isTypedPointerTy(Ty)) + Reg = DT.find(cast<TypedPointerType>(Ty)->getElementType(), + getPointerAddressSpace(Ty), &MIRBuilder.getMF()); else Reg = DT.find(Type::getInt8Ty(MIRBuilder.getMF().getFunction().getContext()), - Ty->getPointerAddressSpace(), &MIRBuilder.getMF()); + getPointerAddressSpace(Ty), &MIRBuilder.getMF()); if (Reg.isValid() && !isSpecialOpaqueType(Ty)) return getSPIRVTypeForVReg(Reg); @@ -836,11 +843,16 @@ bool SPIRVGlobalRegistry::isScalarOrVectorOfType(Register VReg, unsigned SPIRVGlobalRegistry::getScalarOrVectorComponentCount(Register VReg) const { - if (SPIRVType *Type = getSPIRVTypeForVReg(VReg)) - return Type->getOpcode() == SPIRV::OpTypeVector - ? static_cast<unsigned>(Type->getOperand(2).getImm()) - : 1; - return 0; + return getScalarOrVectorComponentCount(getSPIRVTypeForVReg(VReg)); +} + +unsigned +SPIRVGlobalRegistry::getScalarOrVectorComponentCount(SPIRVType *Type) const { + if (!Type) + return 0; + return Type->getOpcode() == SPIRV::OpTypeVector + ? static_cast<unsigned>(Type->getOperand(2).getImm()) + : 1; } unsigned diff --git a/llvm/lib/Target/SPIRV/SPIRVGlobalRegistry.h b/llvm/lib/Target/SPIRV/SPIRVGlobalRegistry.h index 9c0061d..25d82ebf 100644 --- a/llvm/lib/Target/SPIRV/SPIRVGlobalRegistry.h +++ b/llvm/lib/Target/SPIRV/SPIRVGlobalRegistry.h @@ -198,9 +198,10 @@ public: // opcode (e.g. OpTypeBool, or OpTypeVector %x 4, where %x is OpTypeBool). bool isScalarOrVectorOfType(Register VReg, unsigned TypeOpcode) const; - // Return number of elements in a vector if the given VReg is associated with + // Return number of elements in a vector if the argument is associated with // a vector type. Return 1 for a scalar type, and 0 for a missing type. unsigned getScalarOrVectorComponentCount(Register VReg) const; + unsigned getScalarOrVectorComponentCount(SPIRVType *Type) const; // For vectors or scalars of booleans, integers and floats, return the scalar // type's bitwidth. Otherwise calls llvm_unreachable(). diff --git a/llvm/lib/Target/SPIRV/SPIRVInstructionSelector.cpp b/llvm/lib/Target/SPIRV/SPIRVInstructionSelector.cpp index 74df8de..fd19b74 100644 --- a/llvm/lib/Target/SPIRV/SPIRVInstructionSelector.cpp +++ b/llvm/lib/Target/SPIRV/SPIRVInstructionSelector.cpp @@ -125,6 +125,8 @@ private: bool selectConstVector(Register ResVReg, const SPIRVType *ResType, MachineInstr &I) const; + bool selectSplatVector(Register ResVReg, const SPIRVType *ResType, + MachineInstr &I) const; bool selectCmp(Register ResVReg, const SPIRVType *ResType, unsigned comparisonOpcode, MachineInstr &I) const; @@ -313,6 +315,8 @@ bool SPIRVInstructionSelector::spvSelect(Register ResVReg, case TargetOpcode::G_BUILD_VECTOR: return selectConstVector(ResVReg, ResType, I); + case TargetOpcode::G_SPLAT_VECTOR: + return selectSplatVector(ResVReg, ResType, I); case TargetOpcode::G_SHUFFLE_VECTOR: { MachineBasicBlock &BB = *I.getParent(); @@ -1185,6 +1189,43 @@ bool SPIRVInstructionSelector::selectConstVector(Register ResVReg, return MIB.constrainAllUses(TII, TRI, RBI); } +bool SPIRVInstructionSelector::selectSplatVector(Register ResVReg, + const SPIRVType *ResType, + MachineInstr &I) const { + if (ResType->getOpcode() != SPIRV::OpTypeVector) + report_fatal_error("Cannot select G_SPLAT_VECTOR with a non-vector result"); + unsigned N = GR.getScalarOrVectorComponentCount(ResType); + unsigned OpIdx = I.getNumExplicitDefs(); + if (!I.getOperand(OpIdx).isReg()) + report_fatal_error("Unexpected argument in G_SPLAT_VECTOR"); + + // check if we may construct a constant vector + Register OpReg = I.getOperand(OpIdx).getReg(); + bool IsConst = false; + if (SPIRVType *OpDef = MRI->getVRegDef(OpReg)) { + if (OpDef->getOpcode() == SPIRV::ASSIGN_TYPE && + OpDef->getOperand(1).isReg()) { + if (SPIRVType *RefDef = MRI->getVRegDef(OpDef->getOperand(1).getReg())) + OpDef = RefDef; + } + IsConst = OpDef->getOpcode() == TargetOpcode::G_CONSTANT || + OpDef->getOpcode() == TargetOpcode::G_FCONSTANT; + } + + if (!IsConst && N < 2) + report_fatal_error( + "There must be at least two constituent operands in a vector"); + + auto MIB = BuildMI(*I.getParent(), I, I.getDebugLoc(), + TII.get(IsConst ? SPIRV::OpConstantComposite + : SPIRV::OpCompositeConstruct)) + .addDef(ResVReg) + .addUse(GR.getSPIRVTypeID(ResType)); + for (unsigned i = 0; i < N; ++i) + MIB.addUse(OpReg); + return MIB.constrainAllUses(TII, TRI, RBI); +} + bool SPIRVInstructionSelector::selectCmp(Register ResVReg, const SPIRVType *ResType, unsigned CmpOpc, diff --git a/llvm/lib/Target/SPIRV/SPIRVLegalizerInfo.cpp b/llvm/lib/Target/SPIRV/SPIRVLegalizerInfo.cpp index f815487..4b871bd 100644 --- a/llvm/lib/Target/SPIRV/SPIRVLegalizerInfo.cpp +++ b/llvm/lib/Target/SPIRV/SPIRVLegalizerInfo.cpp @@ -149,7 +149,9 @@ SPIRVLegalizerInfo::SPIRVLegalizerInfo(const SPIRVSubtarget &ST) { getActionDefinitionsBuilder(G_GLOBAL_VALUE).alwaysLegal(); // TODO: add proper rules for vectors legalization. - getActionDefinitionsBuilder({G_BUILD_VECTOR, G_SHUFFLE_VECTOR}).alwaysLegal(); + getActionDefinitionsBuilder( + {G_BUILD_VECTOR, G_SHUFFLE_VECTOR, G_SPLAT_VECTOR}) + .alwaysLegal(); // Vector Reduction Operations getActionDefinitionsBuilder( diff --git a/llvm/lib/Target/SPIRV/SPIRVUtils.h b/llvm/lib/Target/SPIRV/SPIRVUtils.h index e5f35aa..d5ed501 100644 --- a/llvm/lib/Target/SPIRV/SPIRVUtils.h +++ b/llvm/lib/Target/SPIRV/SPIRVUtils.h @@ -15,6 +15,7 @@ #include "MCTargetDesc/SPIRVBaseInfo.h" #include "llvm/IR/IRBuilder.h" +#include "llvm/IR/TypedPointerType.h" #include <string> namespace llvm { @@ -100,5 +101,30 @@ bool isEntryPoint(const Function &F); // Parse basic scalar type name, substring TypeName, and return LLVM type. Type *parseBasicTypeName(StringRef TypeName, LLVMContext &Ctx); + +// True if this is an instance of TypedPointerType. +inline bool isTypedPointerTy(const Type *T) { + return T->getTypeID() == Type::TypedPointerTyID; +} + +// True if this is an instance of PointerType. +inline bool isUntypedPointerTy(const Type *T) { + return T->getTypeID() == Type::PointerTyID; +} + +// True if this is an instance of PointerType or TypedPointerType. +inline bool isPointerTy(const Type *T) { + return isUntypedPointerTy(T) || isTypedPointerTy(T); +} + +// Get the address space of this pointer or pointer vector type for instances of +// PointerType or TypedPointerType. +inline unsigned getPointerAddressSpace(const Type *T) { + Type *SubT = T->getScalarType(); + return SubT->getTypeID() == Type::PointerTyID + ? cast<PointerType>(SubT)->getAddressSpace() + : cast<TypedPointerType>(SubT)->getAddressSpace(); +} + } // namespace llvm #endif // LLVM_LIB_TARGET_SPIRV_SPIRVUTILS_H diff --git a/llvm/lib/TargetParser/Host.cpp b/llvm/lib/TargetParser/Host.cpp index ee4fd04..f65ed25 100644 --- a/llvm/lib/TargetParser/Host.cpp +++ b/llvm/lib/TargetParser/Host.cpp @@ -196,14 +196,24 @@ StringRef sys::detail::getHostCPUNameForARM(StringRef ProcCpuinfoContent) { .Case("0xb36", "arm1136j-s") .Case("0xb56", "arm1156t2-s") .Case("0xb76", "arm1176jz-s") + .Case("0xc05", "cortex-a5") + .Case("0xc07", "cortex-a7") .Case("0xc08", "cortex-a8") .Case("0xc09", "cortex-a9") .Case("0xc0f", "cortex-a15") + .Case("0xc0e", "cortex-a17") .Case("0xc20", "cortex-m0") .Case("0xc23", "cortex-m3") .Case("0xc24", "cortex-m4") + .Case("0xc27", "cortex-m7") + .Case("0xd20", "cortex-m23") + .Case("0xd21", "cortex-m33") .Case("0xd24", "cortex-m52") .Case("0xd22", "cortex-m55") + .Case("0xd23", "cortex-m85") + .Case("0xc18", "cortex-r8") + .Case("0xd13", "cortex-r52") + .Case("0xd15", "cortex-r82") .Case("0xd02", "cortex-a34") .Case("0xd04", "cortex-a35") .Case("0xd03", "cortex-a53") @@ -211,13 +221,17 @@ StringRef sys::detail::getHostCPUNameForARM(StringRef ProcCpuinfoContent) { .Case("0xd46", "cortex-a510") .Case("0xd80", "cortex-a520") .Case("0xd07", "cortex-a57") + .Case("0xd06", "cortex-a65") + .Case("0xd43", "cortex-a65ae") .Case("0xd08", "cortex-a72") .Case("0xd09", "cortex-a73") .Case("0xd0a", "cortex-a75") .Case("0xd0b", "cortex-a76") + .Case("0xd0e", "cortex-a76ae") .Case("0xd0d", "cortex-a77") .Case("0xd41", "cortex-a78") .Case("0xd42", "cortex-a78ae") + .Case("0xd4b", "cortex-a78c") .Case("0xd47", "cortex-a710") .Case("0xd4d", "cortex-a715") .Case("0xd81", "cortex-a720") @@ -226,6 +240,7 @@ StringRef sys::detail::getHostCPUNameForARM(StringRef ProcCpuinfoContent) { .Case("0xd48", "cortex-x2") .Case("0xd4e", "cortex-x3") .Case("0xd82", "cortex-x4") + .Case("0xd4a", "neoverse-e1") .Case("0xd0c", "neoverse-n1") .Case("0xd49", "neoverse-n2") .Case("0xd40", "neoverse-v1") diff --git a/llvm/lib/Transforms/IPO/AttributorAttributes.cpp b/llvm/lib/Transforms/IPO/AttributorAttributes.cpp index 488a6f0..f98833b 100644 --- a/llvm/lib/Transforms/IPO/AttributorAttributes.cpp +++ b/llvm/lib/Transforms/IPO/AttributorAttributes.cpp @@ -12371,7 +12371,7 @@ struct AAIndirectCallInfoCallSite : public AAIndirectCallInfo { SplitBlockAndInsertIfThen(LastCmp, IP, /* Unreachable */ false); BasicBlock *CBBB = CB->getParent(); A.registerManifestAddedBasicBlock(*ThenTI->getParent()); - A.registerManifestAddedBasicBlock(*CBBB); + A.registerManifestAddedBasicBlock(*IP->getParent()); auto *SplitTI = cast<BranchInst>(LastCmp->getNextNode()); BasicBlock *ElseBB; if (&*IP == CB) { diff --git a/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp b/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp index f5f3716..694b180 100644 --- a/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp +++ b/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp @@ -504,6 +504,11 @@ static Instruction *foldCttzCtlz(IntrinsicInst &II, InstCombinerImpl &IC) { return IC.replaceInstUsesWith(II, ConstantInt::getNullValue(II.getType())); } + // If ctlz/cttz is only used as a shift amount, set is_zero_poison to true. + if (II.hasOneUse() && match(Op1, m_Zero()) && + match(II.user_back(), m_Shift(m_Value(), m_Specific(&II)))) + return IC.replaceOperand(II, 1, IC.Builder.getTrue()); + Constant *C; if (IsTZ) { diff --git a/llvm/lib/Transforms/InstCombine/InstCombineCasts.cpp b/llvm/lib/Transforms/InstCombine/InstCombineCasts.cpp index 45afa63..a9817f1 100644 --- a/llvm/lib/Transforms/InstCombine/InstCombineCasts.cpp +++ b/llvm/lib/Transforms/InstCombine/InstCombineCasts.cpp @@ -1121,6 +1121,10 @@ Instruction *InstCombinerImpl::visitZExt(ZExtInst &Zext) { Value *Src = Zext.getOperand(0); Type *SrcTy = Src->getType(), *DestTy = Zext.getType(); + // zext nneg bool x -> 0 + if (SrcTy->isIntOrIntVectorTy(1) && Zext.hasNonNeg()) + return replaceInstUsesWith(Zext, Constant::getNullValue(Zext.getType())); + // Try to extend the entire expression tree to the wide destination type. unsigned BitsToClear; if (shouldChangeType(SrcTy, DestTy) && diff --git a/llvm/lib/Transforms/InstCombine/InstructionCombining.cpp b/llvm/lib/Transforms/InstCombine/InstructionCombining.cpp index 1688005..c9bbe43 100644 --- a/llvm/lib/Transforms/InstCombine/InstructionCombining.cpp +++ b/llvm/lib/Transforms/InstCombine/InstructionCombining.cpp @@ -5202,7 +5202,8 @@ static bool combineInstructionsOverFunction( if (Iteration > Opts.MaxIterations) { report_fatal_error( "Instruction Combining did not reach a fixpoint after " + - Twine(Opts.MaxIterations) + " iterations"); + Twine(Opts.MaxIterations) + " iterations", + /*GenCrashDiag=*/false); } } diff --git a/llvm/lib/Transforms/Instrumentation/HWAddressSanitizer.cpp b/llvm/lib/Transforms/Instrumentation/HWAddressSanitizer.cpp index 11a5c29c..87584da 100644 --- a/llvm/lib/Transforms/Instrumentation/HWAddressSanitizer.cpp +++ b/llvm/lib/Transforms/Instrumentation/HWAddressSanitizer.cpp @@ -260,6 +260,10 @@ static cl::opt<bool> ClUsePageAliases("hwasan-experimental-use-page-aliases", namespace { +template <typename T> T optOr(cl::opt<T> &Opt, T Other) { + return Opt.getNumOccurrences() ? Opt : Other; +} + bool shouldUsePageAliases(const Triple &TargetTriple) { return ClUsePageAliases && TargetTriple.getArch() == Triple::x86_64; } @@ -269,14 +273,11 @@ bool shouldInstrumentStack(const Triple &TargetTriple) { } bool shouldInstrumentWithCalls(const Triple &TargetTriple) { - return ClInstrumentWithCalls.getNumOccurrences() - ? ClInstrumentWithCalls - : TargetTriple.getArch() == Triple::x86_64; + return optOr(ClInstrumentWithCalls, TargetTriple.getArch() == Triple::x86_64); } bool mightUseStackSafetyAnalysis(bool DisableOptimization) { - return ClUseStackSafety.getNumOccurrences() ? ClUseStackSafety - : !DisableOptimization; + return optOr(ClUseStackSafety, !DisableOptimization); } bool shouldUseStackSafetyAnalysis(const Triple &TargetTriple, @@ -296,10 +297,8 @@ public: HWAddressSanitizer(Module &M, bool CompileKernel, bool Recover, const StackSafetyGlobalInfo *SSI) : M(M), SSI(SSI) { - this->Recover = ClRecover.getNumOccurrences() > 0 ? ClRecover : Recover; - this->CompileKernel = ClEnableKhwasan.getNumOccurrences() > 0 - ? ClEnableKhwasan - : CompileKernel; + this->Recover = optOr(ClRecover, Recover); + this->CompileKernel = optOr(ClEnableKhwasan, CompileKernel); this->Rng = ClRandomSkipRate.getNumOccurrences() ? M.createRNG("hwasan") : nullptr; @@ -625,19 +624,14 @@ void HWAddressSanitizer::initializeModule() { bool NewRuntime = !TargetTriple.isAndroid() || !TargetTriple.isAndroidVersionLT(30); - UseShortGranules = - ClUseShortGranules.getNumOccurrences() ? ClUseShortGranules : NewRuntime; - OutlinedChecks = - (TargetTriple.isAArch64() || TargetTriple.isRISCV64()) && - TargetTriple.isOSBinFormatELF() && - (ClInlineAllChecks.getNumOccurrences() ? !ClInlineAllChecks : !Recover); + UseShortGranules = optOr(ClUseShortGranules, NewRuntime); + OutlinedChecks = (TargetTriple.isAArch64() || TargetTriple.isRISCV64()) && + TargetTriple.isOSBinFormatELF() && + !optOr(ClInlineAllChecks, Recover); - InlineFastPath = - (ClInlineFastPathChecks.getNumOccurrences() - ? ClInlineFastPathChecks - : !(TargetTriple.isAndroid() || - TargetTriple.isOSFuchsia())); // These platforms may prefer less - // inlining to reduce binary size. + // These platforms may prefer less inlining to reduce binary size. + InlineFastPath = optOr(ClInlineFastPathChecks, !(TargetTriple.isAndroid() || + TargetTriple.isOSFuchsia())); if (ClMatchAllTag.getNumOccurrences()) { if (ClMatchAllTag != -1) { @@ -649,22 +643,17 @@ void HWAddressSanitizer::initializeModule() { UseMatchAllCallback = !CompileKernel && MatchAllTag.has_value(); // If we don't have personality function support, fall back to landing pads. - InstrumentLandingPads = ClInstrumentLandingPads.getNumOccurrences() - ? ClInstrumentLandingPads - : !NewRuntime; + InstrumentLandingPads = optOr(ClInstrumentLandingPads, !NewRuntime); if (!CompileKernel) { createHwasanCtorComdat(); - bool InstrumentGlobals = - ClGlobals.getNumOccurrences() ? ClGlobals : NewRuntime; + bool InstrumentGlobals = optOr(ClGlobals, NewRuntime); if (InstrumentGlobals && !UsePageAliases) instrumentGlobals(); bool InstrumentPersonalityFunctions = - ClInstrumentPersonalityFunctions.getNumOccurrences() - ? ClInstrumentPersonalityFunctions - : NewRuntime; + optOr(ClInstrumentPersonalityFunctions, NewRuntime); if (InstrumentPersonalityFunctions) instrumentPersonalityFunctions(); } diff --git a/llvm/lib/Transforms/Scalar/Float2Int.cpp b/llvm/lib/Transforms/Scalar/Float2Int.cpp index ccca8bc..6ad4be169 100644 --- a/llvm/lib/Transforms/Scalar/Float2Int.cpp +++ b/llvm/lib/Transforms/Scalar/Float2Int.cpp @@ -311,7 +311,7 @@ void Float2IntPass::walkForwards() { } // If there is a valid transform to be done, do it. -bool Float2IntPass::validateAndTransform() { +bool Float2IntPass::validateAndTransform(const DataLayout &DL) { bool MadeChange = false; // Iterate over every disjoint partition of the def-use graph. @@ -376,15 +376,23 @@ bool Float2IntPass::validateAndTransform() { LLVM_DEBUG(dbgs() << "F2I: Value not guaranteed to be representable!\n"); continue; } - if (MinBW > 64) { - LLVM_DEBUG( - dbgs() << "F2I: Value requires more than 64 bits to represent!\n"); - continue; - } - // OK, R is known to be representable. Now pick a type for it. - // FIXME: Pick the smallest legal type that will fit. - Type *Ty = (MinBW > 32) ? Type::getInt64Ty(*Ctx) : Type::getInt32Ty(*Ctx); + // OK, R is known to be representable. + // Pick the smallest legal type that will fit. + Type *Ty = DL.getSmallestLegalIntType(*Ctx, MinBW); + if (!Ty) { + // Every supported target supports 64-bit and 32-bit integers, + // so fallback to a 32 or 64-bit integer if the value fits. + if (MinBW <= 32) { + Ty = Type::getInt32Ty(*Ctx); + } else if (MinBW <= 64) { + Ty = Type::getInt64Ty(*Ctx); + } else { + LLVM_DEBUG(dbgs() << "F2I: Value requires more than bits to represent " + "than the target supports!\n"); + continue; + } + } for (auto MI = ECs.member_begin(It), ME = ECs.member_end(); MI != ME; ++MI) @@ -491,7 +499,8 @@ bool Float2IntPass::runImpl(Function &F, const DominatorTree &DT) { walkBackwards(); walkForwards(); - bool Modified = validateAndTransform(); + const DataLayout &DL = F.getParent()->getDataLayout(); + bool Modified = validateAndTransform(DL); if (Modified) cleanup(); return Modified; diff --git a/llvm/lib/Transforms/Utils/Local.cpp b/llvm/lib/Transforms/Utils/Local.cpp index 7b74caa..a87e5a3 100644 --- a/llvm/lib/Transforms/Utils/Local.cpp +++ b/llvm/lib/Transforms/Utils/Local.cpp @@ -2848,7 +2848,7 @@ unsigned llvm::changeToUnreachable(Instruction *I, bool PreserveLCSSA, Updates.push_back({DominatorTree::Delete, BB, UniqueSuccessor}); DTU->applyUpdates(Updates); } - BB->flushTerminatorDbgValues(); + BB->flushTerminatorDbgRecords(); return NumInstrsRemoved; } diff --git a/llvm/lib/Transforms/Utils/LoopConstrainer.cpp b/llvm/lib/Transforms/Utils/LoopConstrainer.cpp index 81545ef..d9832ee 100644 --- a/llvm/lib/Transforms/Utils/LoopConstrainer.cpp +++ b/llvm/lib/Transforms/Utils/LoopConstrainer.cpp @@ -42,8 +42,11 @@ static bool isSafeDecreasingBound(const SCEV *Start, const SCEV *BoundSCEV, ICmpInst::Predicate BoundPred = IsSigned ? CmpInst::ICMP_SGT : CmpInst::ICMP_UGT; + auto StartLG = SE.applyLoopGuards(Start, L); + auto BoundLG = SE.applyLoopGuards(BoundSCEV, L); + if (LatchBrExitIdx == 1) - return SE.isLoopEntryGuardedByCond(L, BoundPred, Start, BoundSCEV); + return SE.isLoopEntryGuardedByCond(L, BoundPred, StartLG, BoundLG); assert(LatchBrExitIdx == 0 && "LatchBrExitIdx should be either 0 or 1"); @@ -54,10 +57,10 @@ static bool isSafeDecreasingBound(const SCEV *Start, const SCEV *BoundSCEV, const SCEV *Limit = SE.getMinusSCEV(SE.getConstant(Min), StepPlusOne); const SCEV *MinusOne = - SE.getMinusSCEV(BoundSCEV, SE.getOne(BoundSCEV->getType())); + SE.getMinusSCEV(BoundLG, SE.getOne(BoundLG->getType())); - return SE.isLoopEntryGuardedByCond(L, BoundPred, Start, MinusOne) && - SE.isLoopEntryGuardedByCond(L, BoundPred, BoundSCEV, Limit); + return SE.isLoopEntryGuardedByCond(L, BoundPred, StartLG, MinusOne) && + SE.isLoopEntryGuardedByCond(L, BoundPred, BoundLG, Limit); } /// Given a loop with an increasing induction variable, is it possible to @@ -86,8 +89,11 @@ static bool isSafeIncreasingBound(const SCEV *Start, const SCEV *BoundSCEV, ICmpInst::Predicate BoundPred = IsSigned ? CmpInst::ICMP_SLT : CmpInst::ICMP_ULT; + auto StartLG = SE.applyLoopGuards(Start, L); + auto BoundLG = SE.applyLoopGuards(BoundSCEV, L); + if (LatchBrExitIdx == 1) - return SE.isLoopEntryGuardedByCond(L, BoundPred, Start, BoundSCEV); + return SE.isLoopEntryGuardedByCond(L, BoundPred, StartLG, BoundLG); assert(LatchBrExitIdx == 0 && "LatchBrExitIdx should be 0 or 1"); @@ -97,9 +103,9 @@ static bool isSafeIncreasingBound(const SCEV *Start, const SCEV *BoundSCEV, : APInt::getMaxValue(BitWidth); const SCEV *Limit = SE.getMinusSCEV(SE.getConstant(Max), StepMinusOne); - return (SE.isLoopEntryGuardedByCond(L, BoundPred, Start, - SE.getAddExpr(BoundSCEV, Step)) && - SE.isLoopEntryGuardedByCond(L, BoundPred, BoundSCEV, Limit)); + return (SE.isLoopEntryGuardedByCond(L, BoundPred, StartLG, + SE.getAddExpr(BoundLG, Step)) && + SE.isLoopEntryGuardedByCond(L, BoundPred, BoundLG, Limit)); } /// Returns estimate for max latch taken count of the loop of the narrowest diff --git a/llvm/lib/Transforms/Utils/LoopRotationUtils.cpp b/llvm/lib/Transforms/Utils/LoopRotationUtils.cpp index 8c6af7a..acfd87c 100644 --- a/llvm/lib/Transforms/Utils/LoopRotationUtils.cpp +++ b/llvm/lib/Transforms/Utils/LoopRotationUtils.cpp @@ -577,28 +577,28 @@ bool LoopRotate::rotateLoop(Loop *L, bool SimplifiedLatch) { Module *M = OrigHeader->getModule(); - // Track the next DPValue to clone. If we have a sequence where an + // Track the next DbgRecord to clone. If we have a sequence where an // instruction is hoisted instead of being cloned: - // DPValue blah + // DbgRecord blah // %foo = add i32 0, 0 - // DPValue xyzzy + // DbgRecord xyzzy // %bar = call i32 @foobar() - // where %foo is hoisted, then the DPValue "blah" will be seen twice, once + // where %foo is hoisted, then the DbgRecord "blah" will be seen twice, once // attached to %foo, then when %foo his hoisted it will "fall down" onto the // function call: - // DPValue blah - // DPValue xyzzy + // DbgRecord blah + // DbgRecord xyzzy // %bar = call i32 @foobar() // causing it to appear attached to the call too. // // To avoid this, cloneDebugInfoFrom takes an optional "start cloning from - // here" position to account for this behaviour. We point it at any DPValues - // on the next instruction, here labelled xyzzy, before we hoist %foo. - // Later, we only only clone DPValues from that position (xyzzy) onwards, - // which avoids cloning DPValue "blah" multiple times. - // (Stored as a range because it gives us a natural way of testing whether - // there were DPValues on the next instruction before we hoisted things). - iterator_range<DPValue::self_iterator> NextDbgInsts = + // here" position to account for this behaviour. We point it at any + // DbgRecords on the next instruction, here labelled xyzzy, before we hoist + // %foo. Later, we only only clone DbgRecords from that position (xyzzy) + // onwards, which avoids cloning DbgRecord "blah" multiple times. (Stored as + // a range because it gives us a natural way of testing whether + // there were DbgRecords on the next instruction before we hoisted things). + iterator_range<DbgRecord::self_iterator> NextDbgInsts = (I != E) ? I->getDbgRecordRange() : DPMarker::getEmptyDbgRecordRange(); while (I != E) { @@ -777,7 +777,7 @@ bool LoopRotate::rotateLoop(Loop *L, bool SimplifiedLatch) { // OrigPreHeader's old terminator (the original branch into the loop), and // remove the corresponding incoming values from the PHI nodes in OrigHeader. LoopEntryBranch->eraseFromParent(); - OrigPreheader->flushTerminatorDbgValues(); + OrigPreheader->flushTerminatorDbgRecords(); // Update MemorySSA before the rewrite call below changes the 1:1 // instruction:cloned_instruction_or_value mapping. diff --git a/llvm/lib/Transforms/Utils/SimplifyCFG.cpp b/llvm/lib/Transforms/Utils/SimplifyCFG.cpp index 0f3d140..6d2a6a3 100644 --- a/llvm/lib/Transforms/Utils/SimplifyCFG.cpp +++ b/llvm/lib/Transforms/Utils/SimplifyCFG.cpp @@ -1572,7 +1572,8 @@ hoistLockstepIdenticalDPValues(Instruction *TI, Instruction *I1, while (none_of(Itrs, atEnd)) { bool HoistDPVs = allIdentical(Itrs); for (CurrentAndEndIt &Pair : Itrs) { - // Increment Current iterator now as we may be about to move the DPValue. + // Increment Current iterator now as we may be about to move the + // DbgRecord. DbgRecord &DR = *Pair.first++; if (HoistDPVs) { DR.removeFromParent(); @@ -5304,7 +5305,7 @@ bool SimplifyCFGOpt::simplifyUnreachable(UnreachableInst *UI) { // Ensure that any debug-info records that used to occur after the Unreachable // are moved to in front of it -- otherwise they'll "dangle" at the end of // the block. - BB->flushTerminatorDbgValues(); + BB->flushTerminatorDbgRecords(); // Debug-info records on the unreachable inst itself should be deleted, as // below we delete everything past the final executable instruction. @@ -5326,8 +5327,8 @@ bool SimplifyCFGOpt::simplifyUnreachable(UnreachableInst *UI) { // block will be the unwind edges of Invoke/CatchSwitch/CleanupReturn, // and we can therefore guarantee this block will be erased. - // If we're deleting this, we're deleting any subsequent dbg.values, so - // delete DPValue records of variable information. + // If we're deleting this, we're deleting any subsequent debug info, so + // delete DbgRecords. BBI->dropDbgRecords(); // Delete this instruction (any uses are guaranteed to be dead) diff --git a/llvm/lib/Transforms/Utils/ValueMapper.cpp b/llvm/lib/Transforms/Utils/ValueMapper.cpp index 3da1610..abb7a44 100644 --- a/llvm/lib/Transforms/Utils/ValueMapper.cpp +++ b/llvm/lib/Transforms/Utils/ValueMapper.cpp @@ -146,7 +146,7 @@ public: Value *mapValue(const Value *V); void remapInstruction(Instruction *I); void remapFunction(Function &F); - void remapDPValue(DbgRecord &DPV); + void remapDbgRecord(DbgRecord &DPV); Constant *mapConstant(const Constant *C) { return cast_or_null<Constant>(mapValue(C)); @@ -537,7 +537,7 @@ Value *Mapper::mapValue(const Value *V) { return getVM()[V] = ConstantPointerNull::get(cast<PointerType>(NewTy)); } -void Mapper::remapDPValue(DbgRecord &DR) { +void Mapper::remapDbgRecord(DbgRecord &DR) { if (DPLabel *DPL = dyn_cast<DPLabel>(&DR)) { DPL->setLabel(cast<DILabel>(mapMetadata(DPL->getLabel()))); return; @@ -1067,7 +1067,7 @@ void Mapper::remapFunction(Function &F) { for (Instruction &I : BB) { remapInstruction(&I); for (DbgRecord &DR : I.getDbgRecordRange()) - remapDPValue(DR); + remapDbgRecord(DR); } } } @@ -1234,7 +1234,7 @@ void ValueMapper::remapInstruction(Instruction &I) { } void ValueMapper::remapDPValue(Module *M, DPValue &V) { - FlushingMapper(pImpl)->remapDPValue(V); + FlushingMapper(pImpl)->remapDbgRecord(V); } void ValueMapper::remapDPValueRange( diff --git a/llvm/lib/Transforms/Vectorize/LoopVectorizationPlanner.h b/llvm/lib/Transforms/Vectorize/LoopVectorizationPlanner.h index a7ebf78..e86705e 100644 --- a/llvm/lib/Transforms/Vectorize/LoopVectorizationPlanner.h +++ b/llvm/lib/Transforms/Vectorize/LoopVectorizationPlanner.h @@ -79,6 +79,13 @@ public: VPBasicBlock *getInsertBlock() const { return BB; } VPBasicBlock::iterator getInsertPoint() const { return InsertPt; } + /// Create a VPBuilder to insert after \p R. + static VPBuilder getToInsertAfter(VPRecipeBase *R) { + VPBuilder B; + B.setInsertPoint(R->getParent(), std::next(R->getIterator())); + return B; + } + /// InsertPoint - A saved insertion point. class VPInsertPoint { VPBasicBlock *Block = nullptr; @@ -131,8 +138,9 @@ public: /// Create an N-ary operation with \p Opcode, \p Operands and set \p Inst as /// its underlying Instruction. - VPValue *createNaryOp(unsigned Opcode, ArrayRef<VPValue *> Operands, - Instruction *Inst = nullptr, const Twine &Name = "") { + VPInstruction *createNaryOp(unsigned Opcode, ArrayRef<VPValue *> Operands, + Instruction *Inst = nullptr, + const Twine &Name = "") { DebugLoc DL; if (Inst) DL = Inst->getDebugLoc(); @@ -140,8 +148,8 @@ public: NewVPInst->setUnderlyingValue(Inst); return NewVPInst; } - VPValue *createNaryOp(unsigned Opcode, ArrayRef<VPValue *> Operands, - DebugLoc DL, const Twine &Name = "") { + VPInstruction *createNaryOp(unsigned Opcode, ArrayRef<VPValue *> Operands, + DebugLoc DL, const Twine &Name = "") { return createInstruction(Opcode, Operands, DL, Name); } diff --git a/llvm/lib/Transforms/Vectorize/VPlanHCFGBuilder.cpp b/llvm/lib/Transforms/Vectorize/VPlanHCFGBuilder.cpp index 6474a96..877b5d4 100644 --- a/llvm/lib/Transforms/Vectorize/VPlanHCFGBuilder.cpp +++ b/llvm/lib/Transforms/Vectorize/VPlanHCFGBuilder.cpp @@ -296,8 +296,7 @@ void PlainCFGBuilder::createVPInstructionsForVPBB(VPBasicBlock *VPBB, // recipes. if (Br->isConditional()) { VPValue *Cond = getOrCreateVPOperand(Br->getCondition()); - VPBB->appendRecipe( - new VPInstruction(VPInstruction::BranchOnCond, {Cond})); + VPIRBuilder.createNaryOp(VPInstruction::BranchOnCond, {Cond}, Inst); } // Skip the rest of the Instruction processing for Branch instructions. diff --git a/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp b/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp index f6b564a..3b19db9 100644 --- a/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp +++ b/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp @@ -1192,10 +1192,10 @@ void VPlanTransforms::addActiveLaneMask( LaneMask = addVPLaneMaskPhiAndUpdateExitBranch( Plan, DataAndControlFlowWithoutRuntimeCheck); } else { - LaneMask = new VPInstruction(VPInstruction::ActiveLaneMask, - {WideCanonicalIV, Plan.getTripCount()}, - nullptr, "active.lane.mask"); - LaneMask->insertAfter(WideCanonicalIV); + VPBuilder B = VPBuilder::getToInsertAfter(WideCanonicalIV); + LaneMask = B.createNaryOp(VPInstruction::ActiveLaneMask, + {WideCanonicalIV, Plan.getTripCount()}, nullptr, + "active.lane.mask"); } // Walk users of WideCanonicalIV and replace all compares of the form |