diff options
Diffstat (limited to 'llvm/lib/CodeGen')
34 files changed, 305 insertions, 239 deletions
diff --git a/llvm/lib/CodeGen/AggressiveAntiDepBreaker.cpp b/llvm/lib/CodeGen/AggressiveAntiDepBreaker.cpp index 6567bd4..46b5bb7 100644 --- a/llvm/lib/CodeGen/AggressiveAntiDepBreaker.cpp +++ b/llvm/lib/CodeGen/AggressiveAntiDepBreaker.cpp @@ -395,7 +395,7 @@ void AggressiveAntiDepBreaker::PrescanInstruction( // Note register reference... const TargetRegisterClass *RC = nullptr; if (i < MI.getDesc().getNumOperands()) - RC = TII->getRegClass(MI.getDesc(), i, TRI); + RC = TII->getRegClass(MI.getDesc(), i); AggressiveAntiDepState::RegisterReference RR = { &MO, RC }; RegRefs.emplace(Reg.asMCReg(), RR); } @@ -479,7 +479,7 @@ void AggressiveAntiDepBreaker::ScanInstruction(MachineInstr &MI, // Note register reference... const TargetRegisterClass *RC = nullptr; if (i < MI.getDesc().getNumOperands()) - RC = TII->getRegClass(MI.getDesc(), i, TRI); + RC = TII->getRegClass(MI.getDesc(), i); AggressiveAntiDepState::RegisterReference RR = { &MO, RC }; RegRefs.emplace(Reg.asMCReg(), RR); } diff --git a/llvm/lib/CodeGen/BasicBlockSections.cpp b/llvm/lib/CodeGen/BasicBlockSections.cpp index e317e1c..52e2909 100644 --- a/llvm/lib/CodeGen/BasicBlockSections.cpp +++ b/llvm/lib/CodeGen/BasicBlockSections.cpp @@ -183,8 +183,7 @@ updateBranches(MachineFunction &MF, // clusters are ordered in increasing order of their IDs, with the "Exception" // and "Cold" succeeding all other clusters. // FuncClusterInfo represents the cluster information for basic blocks. It -// maps from BBID of basic blocks to their cluster information. If this is -// empty, it means unique sections for all basic blocks in the function. +// maps from BBID of basic blocks to their cluster information. static void assignSections(MachineFunction &MF, const DenseMap<UniqueBBID, BBClusterInfo> &FuncClusterInfo) { @@ -197,10 +196,8 @@ assignSections(MachineFunction &MF, for (auto &MBB : MF) { // With the 'all' option, every basic block is placed in a unique section. // With the 'list' option, every basic block is placed in a section - // associated with its cluster, unless we want individual unique sections - // for every basic block in this function (if FuncClusterInfo is empty). - if (MF.getTarget().getBBSectionsType() == llvm::BasicBlockSection::All || - FuncClusterInfo.empty()) { + // associated with its cluster. + if (MF.getTarget().getBBSectionsType() == llvm::BasicBlockSection::All) { // If unique sections are desired for all basic blocks of the function, we // set every basic block's section ID equal to its original position in // the layout (which is equal to its number). This ensures that basic @@ -308,22 +305,22 @@ bool BasicBlockSections::handleBBSections(MachineFunction &MF) { if (BBSectionsType == BasicBlockSection::List && hasInstrProfHashMismatch(MF)) return false; - // Renumber blocks before sorting them. This is useful for accessing the - // original layout positions and finding the original fallthroughs. - MF.RenumberBlocks(); DenseMap<UniqueBBID, BBClusterInfo> FuncClusterInfo; if (BBSectionsType == BasicBlockSection::List) { - auto [HasProfile, ClusterInfo] = - getAnalysis<BasicBlockSectionsProfileReaderWrapperPass>() - .getClusterInfoForFunction(MF.getName()); - if (!HasProfile) + auto ClusterInfo = getAnalysis<BasicBlockSectionsProfileReaderWrapperPass>() + .getClusterInfoForFunction(MF.getName()); + if (ClusterInfo.empty()) return false; for (auto &BBClusterInfo : ClusterInfo) { FuncClusterInfo.try_emplace(BBClusterInfo.BBID, BBClusterInfo); } } + // Renumber blocks before sorting them. This is useful for accessing the + // original layout positions and finding the original fallthroughs. + MF.RenumberBlocks(); + MF.setBBSectionsType(BBSectionsType); assignSections(MF, FuncClusterInfo); diff --git a/llvm/lib/CodeGen/BasicBlockSectionsProfileReader.cpp b/llvm/lib/CodeGen/BasicBlockSectionsProfileReader.cpp index 485b44ae..c234c0f 100644 --- a/llvm/lib/CodeGen/BasicBlockSectionsProfileReader.cpp +++ b/llvm/lib/CodeGen/BasicBlockSectionsProfileReader.cpp @@ -58,22 +58,24 @@ BasicBlockSectionsProfileReader::parseUniqueBBID(StringRef S) const { } bool BasicBlockSectionsProfileReader::isFunctionHot(StringRef FuncName) const { - return getClusterInfoForFunction(FuncName).first; + return !getClusterInfoForFunction(FuncName).empty(); } -std::pair<bool, SmallVector<BBClusterInfo>> +SmallVector<BBClusterInfo> BasicBlockSectionsProfileReader::getClusterInfoForFunction( StringRef FuncName) const { auto R = ProgramPathAndClusterInfo.find(getAliasName(FuncName)); - return R != ProgramPathAndClusterInfo.end() - ? std::pair(true, R->second.ClusterInfo) - : std::pair(false, SmallVector<BBClusterInfo>()); + return R != ProgramPathAndClusterInfo.end() ? R->second.ClusterInfo + : SmallVector<BBClusterInfo>(); } SmallVector<SmallVector<unsigned>> BasicBlockSectionsProfileReader::getClonePathsForFunction( StringRef FuncName) const { - return ProgramPathAndClusterInfo.lookup(getAliasName(FuncName)).ClonePaths; + auto R = ProgramPathAndClusterInfo.find(getAliasName(FuncName)); + return R != ProgramPathAndClusterInfo.end() + ? R->second.ClonePaths + : SmallVector<SmallVector<unsigned>>(); } uint64_t BasicBlockSectionsProfileReader::getEdgeCount( @@ -494,7 +496,7 @@ bool BasicBlockSectionsProfileReaderWrapperPass::isFunctionHot( return BBSPR.isFunctionHot(FuncName); } -std::pair<bool, SmallVector<BBClusterInfo>> +SmallVector<BBClusterInfo> BasicBlockSectionsProfileReaderWrapperPass::getClusterInfoForFunction( StringRef FuncName) const { return BBSPR.getClusterInfoForFunction(FuncName); diff --git a/llvm/lib/CodeGen/BreakFalseDeps.cpp b/llvm/lib/CodeGen/BreakFalseDeps.cpp index 1846880..fead3ee 100644 --- a/llvm/lib/CodeGen/BreakFalseDeps.cpp +++ b/llvm/lib/CodeGen/BreakFalseDeps.cpp @@ -133,7 +133,7 @@ bool BreakFalseDeps::pickBestRegisterForUndef(MachineInstr *MI, unsigned OpIdx, } // Get the undef operand's register class - const TargetRegisterClass *OpRC = TII->getRegClass(MI->getDesc(), OpIdx, TRI); + const TargetRegisterClass *OpRC = TII->getRegClass(MI->getDesc(), OpIdx); assert(OpRC && "Not a valid register class"); // If the instruction has a true dependency, we can hide the false depdency diff --git a/llvm/lib/CodeGen/CriticalAntiDepBreaker.cpp b/llvm/lib/CodeGen/CriticalAntiDepBreaker.cpp index 86377cf..3259a3e 100644 --- a/llvm/lib/CodeGen/CriticalAntiDepBreaker.cpp +++ b/llvm/lib/CodeGen/CriticalAntiDepBreaker.cpp @@ -187,7 +187,7 @@ void CriticalAntiDepBreaker::PrescanInstruction(MachineInstr &MI) { const TargetRegisterClass *NewRC = nullptr; if (i < MI.getDesc().getNumOperands()) - NewRC = TII->getRegClass(MI.getDesc(), i, TRI); + NewRC = TII->getRegClass(MI.getDesc(), i); // For now, only allow the register to be changed if its register // class is consistent across all uses. @@ -316,7 +316,7 @@ void CriticalAntiDepBreaker::ScanInstruction(MachineInstr &MI, unsigned Count) { const TargetRegisterClass *NewRC = nullptr; if (i < MI.getDesc().getNumOperands()) - NewRC = TII->getRegClass(MI.getDesc(), i, TRI); + NewRC = TII->getRegClass(MI.getDesc(), i); // For now, only allow the register to be changed if its register // class is consistent across all uses. diff --git a/llvm/lib/CodeGen/FixupStatepointCallerSaved.cpp b/llvm/lib/CodeGen/FixupStatepointCallerSaved.cpp index 8b74dce..c23cac7 100644 --- a/llvm/lib/CodeGen/FixupStatepointCallerSaved.cpp +++ b/llvm/lib/CodeGen/FixupStatepointCallerSaved.cpp @@ -420,7 +420,7 @@ public: LLVM_DEBUG(dbgs() << "Insert spill before " << *InsertBefore); TII.storeRegToStackSlot(*MI.getParent(), InsertBefore, Reg, IsKill, FI, - RC, &TRI, Register()); + RC, Register()); } } @@ -429,7 +429,7 @@ public: const TargetRegisterClass *RC = TRI.getMinimalPhysRegClass(Reg); int FI = RegToSlotIdx[Reg]; if (It != MBB->end()) { - TII.loadRegFromStackSlot(*MBB, It, Reg, FI, RC, &TRI, Register()); + TII.loadRegFromStackSlot(*MBB, It, Reg, FI, RC, Register()); return; } @@ -437,7 +437,7 @@ public: // and then swap them. assert(!MBB->empty() && "Empty block"); --It; - TII.loadRegFromStackSlot(*MBB, It, Reg, FI, RC, &TRI, Register()); + TII.loadRegFromStackSlot(*MBB, It, Reg, FI, RC, Register()); MachineInstr *Reload = It->getPrevNode(); int Dummy = 0; (void)Dummy; diff --git a/llvm/lib/CodeGen/GlobalISel/Utils.cpp b/llvm/lib/CodeGen/GlobalISel/Utils.cpp index 5fab6ec..e8954a3 100644 --- a/llvm/lib/CodeGen/GlobalISel/Utils.cpp +++ b/llvm/lib/CodeGen/GlobalISel/Utils.cpp @@ -114,7 +114,7 @@ Register llvm::constrainOperandRegClass( // Assume physical registers are properly constrained. assert(Reg.isVirtual() && "PhysReg not implemented"); - const TargetRegisterClass *OpRC = TII.getRegClass(II, OpIdx, &TRI); + const TargetRegisterClass *OpRC = TII.getRegClass(II, OpIdx); // Some of the target independent instructions, like COPY, may not impose any // register class constraints on some of their operands: If it's a use, we can // skip constraining as the instruction defining the register would constrain diff --git a/llvm/lib/CodeGen/InitUndef.cpp b/llvm/lib/CodeGen/InitUndef.cpp index e07e598..12b36f5 100644 --- a/llvm/lib/CodeGen/InitUndef.cpp +++ b/llvm/lib/CodeGen/InitUndef.cpp @@ -232,7 +232,7 @@ bool InitUndef::processBasicBlock(MachineFunction &MF, MachineBasicBlock &MBB, MachineOperand &UseMO = MI.getOperand(UseOpIdx); if (UseMO.getReg() == MCRegister::NoRegister) { const TargetRegisterClass *RC = - TII->getRegClass(MI.getDesc(), UseOpIdx, TRI); + TII->getRegClass(MI.getDesc(), UseOpIdx); Register NewDest = MRI->createVirtualRegister(RC); // We don't have a way to update dead lanes, so keep track of the // new register so that we avoid querying it later. diff --git a/llvm/lib/CodeGen/InlineSpiller.cpp b/llvm/lib/CodeGen/InlineSpiller.cpp index c3e0964..6837030 100644 --- a/llvm/lib/CodeGen/InlineSpiller.cpp +++ b/llvm/lib/CodeGen/InlineSpiller.cpp @@ -473,7 +473,7 @@ bool InlineSpiller::hoistSpillInsideBB(LiveInterval &SpillLI, MachineInstrSpan MIS(MII, MBB); // Insert spill without kill flag immediately after def. TII.storeRegToStackSlot(*MBB, MII, SrcReg, false, StackSlot, - MRI.getRegClass(SrcReg), &TRI, Register()); + MRI.getRegClass(SrcReg), Register()); LIS.InsertMachineInstrRangeInMaps(MIS.begin(), MII); for (const MachineInstr &MI : make_range(MIS.begin(), MII)) getVDefInterval(MI, LIS); @@ -1119,7 +1119,7 @@ void InlineSpiller::insertReload(Register NewVReg, MachineInstrSpan MIS(MI, &MBB); TII.loadRegFromStackSlot(MBB, MI, NewVReg, StackSlot, - MRI.getRegClass(NewVReg), &TRI, Register()); + MRI.getRegClass(NewVReg), Register()); LIS.InsertMachineInstrRangeInMaps(MIS.begin(), MI); @@ -1155,7 +1155,7 @@ void InlineSpiller::insertSpill(Register NewVReg, bool isKill, if (IsRealSpill) TII.storeRegToStackSlot(MBB, SpillBefore, NewVReg, isKill, StackSlot, - MRI.getRegClass(NewVReg), &TRI, Register()); + MRI.getRegClass(NewVReg), Register()); else // Don't spill undef value. // Anything works for undef, in particular keeping the memory @@ -1729,7 +1729,7 @@ void HoistSpillHelper::hoistAllSpills() { MachineBasicBlock::iterator MII = IPA.getLastInsertPointIter(OrigLI, *BB); MachineInstrSpan MIS(MII, BB); TII.storeRegToStackSlot(*BB, MII, LiveReg, false, Slot, - MRI.getRegClass(LiveReg), &TRI, Register()); + MRI.getRegClass(LiveReg), Register()); LIS.InsertMachineInstrRangeInMaps(MIS.begin(), MII); for (const MachineInstr &MI : make_range(MIS.begin(), MII)) getVDefInterval(MI, LIS); diff --git a/llvm/lib/CodeGen/LiveRangeEdit.cpp b/llvm/lib/CodeGen/LiveRangeEdit.cpp index 5b0365d..6fe1170 100644 --- a/llvm/lib/CodeGen/LiveRangeEdit.cpp +++ b/llvm/lib/CodeGen/LiveRangeEdit.cpp @@ -88,7 +88,7 @@ SlotIndex LiveRangeEdit::rematerializeAt(MachineBasicBlock &MBB, bool Late, unsigned SubIdx, MachineInstr *ReplaceIndexMI) { assert(RM.OrigMI && "Invalid remat"); - TII.reMaterialize(MBB, MI, DestReg, SubIdx, *RM.OrigMI, tri); + TII.reMaterialize(MBB, MI, DestReg, SubIdx, *RM.OrigMI); // DestReg of the cloned instruction cannot be Dead. Set isDead of DestReg // to false anyway in case the isDead flag of RM.OrigMI's dest register // is true. diff --git a/llvm/lib/CodeGen/MLRegAllocEvictAdvisor.cpp b/llvm/lib/CodeGen/MLRegAllocEvictAdvisor.cpp index a72c2c4..32b6c46 100644 --- a/llvm/lib/CodeGen/MLRegAllocEvictAdvisor.cpp +++ b/llvm/lib/CodeGen/MLRegAllocEvictAdvisor.cpp @@ -83,13 +83,6 @@ static cl::opt<std::string> ModelUnderTraining( "regalloc-model", cl::Hidden, cl::desc("The model being trained for register allocation eviction")); -static cl::opt<bool> EnableDevelopmentFeatures( - "regalloc-enable-development-features", cl::Hidden, - cl::desc("Whether or not to enable features under development for the ML " - "regalloc advisor")); - -#else -static const bool EnableDevelopmentFeatures = false; #endif // #ifdef LLVM_HAVE_TFLITE /// The score injection pass. @@ -212,23 +205,6 @@ static const std::vector<int64_t> PerLiveRangeShape{1, NumberOfInterferences}; "lowest stage of an interval in this LR") \ M(float, progress, {1}, "ratio of current queue size to initial size") -#ifdef LLVM_HAVE_TFLITE -#define RA_EVICT_FIRST_DEVELOPMENT_FEATURE(M) \ - M(int64_t, instructions, InstructionsShape, \ - "Opcodes of the instructions covered by the eviction problem") - -#define RA_EVICT_REST_DEVELOPMENT_FEATURES(M) \ - M(int64_t, instructions_mapping, InstructionsMappingShape, \ - "A binary matrix mapping LRs to instruction opcodes") \ - M(float, mbb_frequencies, MBBFrequencyShape, \ - "A vector of machine basic block frequencies") \ - M(int64_t, mbb_mapping, InstructionsShape, \ - "A vector of indices mapping instructions to MBBs") -#else -#define RA_EVICT_FIRST_DEVELOPMENT_FEATURE(M) -#define RA_EVICT_REST_DEVELOPMENT_FEATURES(M) -#endif - // The model learns to pick one of the mask == 1 interferences. This is the // name of the output tensor. The contract with the model is that the output // will be guaranteed to be to a mask == 1 position. Using a macro here to @@ -242,12 +218,6 @@ enum FeatureIDs { #define _FEATURE_IDX_SIMPLE(_, name, __, ___) name #define _FEATURE_IDX(A, B, C, D) _FEATURE_IDX_SIMPLE(A, B, C, D), RA_EVICT_FEATURES_LIST(_FEATURE_IDX) FeatureCount, -#ifdef LLVM_HAVE_TFLITE - RA_EVICT_FIRST_DEVELOPMENT_FEATURE(_FEATURE_IDX_SIMPLE) = FeatureCount, -#else - RA_EVICT_FIRST_DEVELOPMENT_FEATURE(_FEATURE_IDX) -#endif // #ifdef LLVM_HAVE_TFLITE - RA_EVICT_REST_DEVELOPMENT_FEATURES(_FEATURE_IDX) FeaturesWithDevelopmentCount #undef _FEATURE_IDX #undef _FEATURE_IDX_SIMPLE }; @@ -268,11 +238,7 @@ void resetInputs(MLModelRunner &Runner) { std::memset(Runner.getTensorUntyped(FeatureIDs::NAME), 0, \ getTotalSize<TYPE>(SHAPE)); RA_EVICT_FEATURES_LIST(_RESET) - if (EnableDevelopmentFeatures) { - RA_EVICT_FIRST_DEVELOPMENT_FEATURE(_RESET) - RA_EVICT_REST_DEVELOPMENT_FEATURES(_RESET) #undef _RESET - } } // Per-live interval components that get aggregated into the feature values @@ -398,13 +364,7 @@ class ReleaseModeEvictionAdvisorProvider final public: ReleaseModeEvictionAdvisorProvider(LLVMContext &Ctx) : RegAllocEvictionAdvisorProvider(AdvisorMode::Release, Ctx) { - if (EnableDevelopmentFeatures) { - InputFeatures = {RA_EVICT_FEATURES_LIST( - _DECL_FEATURES) RA_EVICT_FIRST_DEVELOPMENT_FEATURE(_DECL_FEATURES) - RA_EVICT_REST_DEVELOPMENT_FEATURES(_DECL_FEATURES)}; - } else { - InputFeatures = {RA_EVICT_FEATURES_LIST(_DECL_FEATURES)}; - } + InputFeatures = {RA_EVICT_FEATURES_LIST(_DECL_FEATURES)}; } // support for isa<> and dyn_cast. static bool classof(const RegAllocEvictionAdvisorProvider *R) { @@ -500,25 +460,12 @@ class DevelopmentModeEvictionAdvisorProvider final public: DevelopmentModeEvictionAdvisorProvider(LLVMContext &Ctx) : RegAllocEvictionAdvisorProvider(AdvisorMode::Development, Ctx) { - if (EnableDevelopmentFeatures) { - InputFeatures = {RA_EVICT_FEATURES_LIST( - _DECL_FEATURES) RA_EVICT_FIRST_DEVELOPMENT_FEATURE(_DECL_FEATURES) - RA_EVICT_REST_DEVELOPMENT_FEATURES(_DECL_FEATURES)}; - TrainingInputFeatures = { - RA_EVICT_FEATURES_LIST(_DECL_TRAIN_FEATURES) - RA_EVICT_FIRST_DEVELOPMENT_FEATURE(_DECL_TRAIN_FEATURES) - RA_EVICT_REST_DEVELOPMENT_FEATURES(_DECL_TRAIN_FEATURES) - TensorSpec::createSpec<float>("action_discount", {1}), - TensorSpec::createSpec<int32_t>("action_step_type", {1}), - TensorSpec::createSpec<float>("action_reward", {1})}; - } else { - InputFeatures = {RA_EVICT_FEATURES_LIST(_DECL_FEATURES)}; - TrainingInputFeatures = { - RA_EVICT_FEATURES_LIST(_DECL_TRAIN_FEATURES) - TensorSpec::createSpec<float>("action_discount", {1}), - TensorSpec::createSpec<int32_t>("action_step_type", {1}), - TensorSpec::createSpec<float>("action_reward", {1})}; - } + InputFeatures = {RA_EVICT_FEATURES_LIST(_DECL_FEATURES)}; + TrainingInputFeatures = { + RA_EVICT_FEATURES_LIST(_DECL_TRAIN_FEATURES) + TensorSpec::createSpec<float>("action_discount", {1}), + TensorSpec::createSpec<int32_t>("action_step_type", {1}), + TensorSpec::createSpec<float>("action_reward", {1})}; if (ModelUnderTraining.empty() && TrainingLog.empty()) { Ctx.emitError("Regalloc development mode should be requested with at " "least logging enabled and/or a training model"); @@ -814,34 +761,6 @@ MCRegister MLEvictAdvisor::tryFindEvictionCandidate( /*NumUrgent*/ 0.0, LRPosInfo); assert(InitialQSize > 0.0 && "We couldn't have gotten here if we had " "nothing to allocate initially."); -#ifdef LLVM_HAVE_TFLITE - if (EnableDevelopmentFeatures) { - extractInstructionFeatures( - LRPosInfo, Runner, - [this](SlotIndex InputIndex) -> int { - auto *CurrentMachineInstruction = - LIS->getInstructionFromIndex(InputIndex); - if (!CurrentMachineInstruction) { - return -1; - } - return CurrentMachineInstruction->getOpcode(); - }, - [this](SlotIndex InputIndex) -> float { - auto *CurrentMachineInstruction = - LIS->getInstructionFromIndex(InputIndex); - return MBFI.getBlockFreqRelativeToEntryBlock( - CurrentMachineInstruction->getParent()); - }, - [this](SlotIndex InputIndex) -> MachineBasicBlock * { - auto *CurrentMachineInstruction = - LIS->getInstructionFromIndex(InputIndex); - return CurrentMachineInstruction->getParent(); - }, - FeatureIDs::instructions, FeatureIDs::instructions_mapping, - FeatureIDs::mbb_frequencies, FeatureIDs::mbb_mapping, - LIS->getSlotIndexes()->getLastIndex()); - } -#endif // #ifdef LLVM_HAVE_TFLITE // Normalize the features. for (auto &V : Largest) V = V ? V : 1.0; @@ -987,13 +906,6 @@ void MLEvictAdvisor::extractFeatures( HintWeights += LIFC.HintWeights; NumRematerializable += LIFC.IsRemat; - - if (EnableDevelopmentFeatures) { - for (auto CurrentSegment : LI) { - LRPosInfo.push_back( - LRStartEndInfo{CurrentSegment.start, CurrentSegment.end, Pos}); - } - } } size_t Size = 0; if (!Intervals.empty()) { @@ -1209,9 +1121,7 @@ int64_t DevelopmentModeEvictAdvisor::tryFindEvictionCandidatePosition( Log->startObservation(); size_t CurrentFeature = 0; - size_t FeatureCount = EnableDevelopmentFeatures - ? FeatureIDs::FeaturesWithDevelopmentCount - : FeatureIDs::FeatureCount; + size_t FeatureCount = FeatureIDs::FeatureCount; for (; CurrentFeature < FeatureCount; ++CurrentFeature) { Log->logTensorValue(CurrentFeature, reinterpret_cast<const char *>( diff --git a/llvm/lib/CodeGen/MachineInstr.cpp b/llvm/lib/CodeGen/MachineInstr.cpp index 37e5c51..eb46124 100644 --- a/llvm/lib/CodeGen/MachineInstr.cpp +++ b/llvm/lib/CodeGen/MachineInstr.cpp @@ -978,7 +978,7 @@ MachineInstr::getRegClassConstraint(unsigned OpIdx, assert(getMF() && "Can't have an MF reference here!"); // Most opcodes have fixed constraints in their MCInstrDesc. if (!isInlineAsm()) - return TII->getRegClass(getDesc(), OpIdx, TRI); + return TII->getRegClass(getDesc(), OpIdx); if (!getOperand(OpIdx).isReg()) return nullptr; diff --git a/llvm/lib/CodeGen/MachineLICM.cpp b/llvm/lib/CodeGen/MachineLICM.cpp index 729e73c..c169467 100644 --- a/llvm/lib/CodeGen/MachineLICM.cpp +++ b/llvm/lib/CodeGen/MachineLICM.cpp @@ -1399,7 +1399,7 @@ MachineInstr *MachineLICMImpl::ExtractHoistableLoad(MachineInstr *MI, if (NewOpc == 0) return nullptr; const MCInstrDesc &MID = TII->get(NewOpc); MachineFunction &MF = *MI->getMF(); - const TargetRegisterClass *RC = TII->getRegClass(MID, LoadRegIndex, TRI); + const TargetRegisterClass *RC = TII->getRegClass(MID, LoadRegIndex); // Ok, we're unfolding. Create a temporary register and do the unfold. Register Reg = MRI->createVirtualRegister(RC); diff --git a/llvm/lib/CodeGen/MachineSink.cpp b/llvm/lib/CodeGen/MachineSink.cpp index 94ed82e..0ceeda4 100644 --- a/llvm/lib/CodeGen/MachineSink.cpp +++ b/llvm/lib/CodeGen/MachineSink.cpp @@ -569,7 +569,7 @@ bool MachineSinking::PerformSinkAndFold(MachineInstr &MI, // Sink a copy of the instruction, replacing a COPY instruction. MachineBasicBlock::iterator InsertPt = SinkDst->getIterator(); Register DstReg = SinkDst->getOperand(0).getReg(); - TII->reMaterialize(*SinkDst->getParent(), InsertPt, DstReg, 0, MI, *TRI); + TII->reMaterialize(*SinkDst->getParent(), InsertPt, DstReg, 0, MI); New = &*std::prev(InsertPt); if (!New->getDebugLoc()) New->setDebugLoc(SinkDst->getDebugLoc()); diff --git a/llvm/lib/CodeGen/MachineVerifier.cpp b/llvm/lib/CodeGen/MachineVerifier.cpp index fdf1048..013f529 100644 --- a/llvm/lib/CodeGen/MachineVerifier.cpp +++ b/llvm/lib/CodeGen/MachineVerifier.cpp @@ -2657,8 +2657,7 @@ MachineVerifier::visitMachineOperand(const MachineOperand *MO, unsigned MONum) { return; } if (MONum < MCID.getNumOperands()) { - if (const TargetRegisterClass *DRC = - TII->getRegClass(MCID, MONum, TRI)) { + if (const TargetRegisterClass *DRC = TII->getRegClass(MCID, MONum)) { if (!DRC->contains(Reg)) { report("Illegal physical register for instruction", MO, MONum); OS << printReg(Reg, TRI) << " is not a " @@ -2742,12 +2741,11 @@ MachineVerifier::visitMachineOperand(const MachineOperand *MO, unsigned MONum) { // has register class constraint, the virtual register must // comply to it. if (!isPreISelGenericOpcode(MCID.getOpcode()) && - MONum < MCID.getNumOperands() && - TII->getRegClass(MCID, MONum, TRI)) { + MONum < MCID.getNumOperands() && TII->getRegClass(MCID, MONum)) { report("Virtual register does not match instruction constraint", MO, MONum); OS << "Expect register class " - << TRI->getRegClassName(TII->getRegClass(MCID, MONum, TRI)) + << TRI->getRegClassName(TII->getRegClass(MCID, MONum)) << " but got nothing\n"; return; } @@ -2773,8 +2771,7 @@ MachineVerifier::visitMachineOperand(const MachineOperand *MO, unsigned MONum) { } } if (MONum < MCID.getNumOperands()) { - if (const TargetRegisterClass *DRC = - TII->getRegClass(MCID, MONum, TRI)) { + if (const TargetRegisterClass *DRC = TII->getRegClass(MCID, MONum)) { if (SubIdx) { const TargetRegisterClass *SuperRC = TRI->getLargestLegalSuperClass(RC, *MF); diff --git a/llvm/lib/CodeGen/RegAllocFast.cpp b/llvm/lib/CodeGen/RegAllocFast.cpp index ec6ffd4..9097728 100644 --- a/llvm/lib/CodeGen/RegAllocFast.cpp +++ b/llvm/lib/CodeGen/RegAllocFast.cpp @@ -594,8 +594,7 @@ void RegAllocFastImpl::spill(MachineBasicBlock::iterator Before, LLVM_DEBUG(dbgs() << " to stack slot #" << FI << '\n'); const TargetRegisterClass &RC = *MRI->getRegClass(VirtReg); - TII->storeRegToStackSlot(*MBB, Before, AssignedReg, Kill, FI, &RC, TRI, - VirtReg); + TII->storeRegToStackSlot(*MBB, Before, AssignedReg, Kill, FI, &RC, VirtReg); ++NumStores; MachineBasicBlock::iterator FirstTerm = MBB->getFirstTerminator(); @@ -652,7 +651,7 @@ void RegAllocFastImpl::reload(MachineBasicBlock::iterator Before, << printReg(PhysReg, TRI) << '\n'); int FI = getStackSpaceFor(VirtReg); const TargetRegisterClass &RC = *MRI->getRegClass(VirtReg); - TII->loadRegFromStackSlot(*MBB, Before, PhysReg, FI, &RC, TRI, VirtReg); + TII->loadRegFromStackSlot(*MBB, Before, PhysReg, FI, &RC, VirtReg); ++NumLoads; } @@ -1123,7 +1122,7 @@ bool RegAllocFastImpl::defineVirtReg(MachineInstr &MI, unsigned OpNum, if (MO.isMBB()) { MachineBasicBlock *Succ = MO.getMBB(); TII->storeRegToStackSlot(*Succ, Succ->begin(), PhysReg, Kill, FI, - &RC, TRI, VirtReg); + &RC, VirtReg); ++NumStores; Succ->addLiveIn(PhysReg); } diff --git a/llvm/lib/CodeGen/RegisterCoalescer.cpp b/llvm/lib/CodeGen/RegisterCoalescer.cpp index 99f7693..005e44f 100644 --- a/llvm/lib/CodeGen/RegisterCoalescer.cpp +++ b/llvm/lib/CodeGen/RegisterCoalescer.cpp @@ -79,9 +79,9 @@ static cl::opt<bool> EnableJoining("join-liveintervals", cl::desc("Coalesce copies (default=true)"), cl::init(true), cl::Hidden); -static cl::opt<bool> UseTerminalRule("terminal-rule", - cl::desc("Apply the terminal rule"), - cl::init(false), cl::Hidden); +static cl::opt<cl::boolOrDefault> + EnableTerminalRule("terminal-rule", cl::desc("Apply the terminal rule"), + cl::init(cl::BOU_UNSET), cl::Hidden); /// Temporary flag to test critical edge unsplitting. static cl::opt<bool> EnableJoinSplits( @@ -134,6 +134,7 @@ class RegisterCoalescer : private LiveRangeEdit::Delegate { SlotIndexes *SI = nullptr; const MachineLoopInfo *Loops = nullptr; RegisterClassInfo RegClassInfo; + bool UseTerminalRule = false; /// Position and VReg of a PHI instruction during coalescing. struct PHIValPos { @@ -1373,7 +1374,7 @@ bool RegisterCoalescer::reMaterializeDef(const CoalescerPair &CP, } const unsigned DefSubIdx = DefMI->getOperand(0).getSubReg(); - const TargetRegisterClass *DefRC = TII->getRegClass(MCID, 0, TRI); + const TargetRegisterClass *DefRC = TII->getRegClass(MCID, 0); if (!DefMI->isImplicitDef()) { if (DstReg.isPhysical()) { Register NewDstReg = DstReg; @@ -4320,6 +4321,11 @@ bool RegisterCoalescer::run(MachineFunction &fn) { else JoinGlobalCopies = (EnableGlobalCopies == cl::BOU_TRUE); + if (EnableTerminalRule == cl::BOU_UNSET) + UseTerminalRule = STI.enableTerminalRule(); + else + UseTerminalRule = EnableTerminalRule == cl::BOU_TRUE; + // If there are PHIs tracked by debug-info, they will need updating during // coalescing. Build an index of those PHIs to ease updating. SlotIndexes *Slots = LIS->getSlotIndexes(); diff --git a/llvm/lib/CodeGen/RegisterScavenging.cpp b/llvm/lib/CodeGen/RegisterScavenging.cpp index 7e26c2e..d886167 100644 --- a/llvm/lib/CodeGen/RegisterScavenging.cpp +++ b/llvm/lib/CodeGen/RegisterScavenging.cpp @@ -276,14 +276,14 @@ RegScavenger::spill(Register Reg, const TargetRegisterClass &RC, int SPAdj, ": Cannot scavenge register without an emergency " "spill slot!"); } - TII->storeRegToStackSlot(*MBB, Before, Reg, true, FI, &RC, TRI, Register()); + TII->storeRegToStackSlot(*MBB, Before, Reg, true, FI, &RC, Register()); MachineBasicBlock::iterator II = std::prev(Before); unsigned FIOperandNum = getFrameIndexOperandNum(*II); TRI->eliminateFrameIndex(II, SPAdj, FIOperandNum, this); // Restore the scavenged register before its use (or first terminator). - TII->loadRegFromStackSlot(*MBB, UseMI, Reg, FI, &RC, TRI, Register()); + TII->loadRegFromStackSlot(*MBB, UseMI, Reg, FI, &RC, Register()); II = std::prev(UseMI); FIOperandNum = getFrameIndexOperandNum(*II); diff --git a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp index f144f17..df353c4 100644 --- a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp @@ -10988,6 +10988,22 @@ SDValue DAGCombiner::visitSRA(SDNode *N) { } } + // fold (sra (xor (sra x, c1), -1), c2) -> (xor (sra x, c3), -1) + // This allows merging two arithmetic shifts even when there's a NOT in + // between. + SDValue X; + APInt C1; + if (N1C && sd_match(N0, m_OneUse(m_Not( + m_OneUse(m_Sra(m_Value(X), m_ConstInt(C1))))))) { + APInt C2 = N1C->getAPIntValue(); + zeroExtendToMatch(C1, C2, 1 /* Overflow Bit */); + APInt Sum = C1 + C2; + unsigned ShiftSum = Sum.getLimitedValue(OpSizeInBits - 1); + SDValue NewShift = DAG.getNode( + ISD::SRA, DL, VT, X, DAG.getShiftAmountConstant(ShiftSum, VT, DL)); + return DAG.getNOT(DL, NewShift, VT); + } + // fold (sra (shl X, m), (sub result_size, n)) // -> (sign_extend (trunc (shl X, (sub (sub result_size, n), m)))) for // result_size - n != m. @@ -18863,6 +18879,26 @@ SDValue DAGCombiner::visitFCOPYSIGN(SDNode *N) { if (SimplifyDemandedBits(SDValue(N, 0))) return SDValue(N, 0); + if (VT != N1.getValueType()) + return SDValue(); + + // If this is equivalent to a disjoint or, replace it with one. This can + // happen if the sign operand is a sign mask (i.e., x << sign_bit_position). + if (DAG.SignBitIsZeroFP(N0) && + DAG.computeKnownBits(N1).Zero.isMaxSignedValue()) { + // TODO: Just directly match the shift pattern. computeKnownBits is heavy + // for a such a narrowly targeted case. + EVT IntVT = VT.changeTypeToInteger(); + // TODO: It appears to be profitable in some situations to unconditionally + // emit a fabs(n0) to perform this combine. + SDValue CastSrc0 = DAG.getNode(ISD::BITCAST, DL, IntVT, N0); + SDValue CastSrc1 = DAG.getNode(ISD::BITCAST, DL, IntVT, N1); + + SDValue SignOr = DAG.getNode(ISD::OR, DL, IntVT, CastSrc0, CastSrc1, + SDNodeFlags::Disjoint); + return DAG.getNode(ISD::BITCAST, DL, VT, SignOr); + } + return SDValue(); } diff --git a/llvm/lib/CodeGen/SelectionDAG/FastISel.cpp b/llvm/lib/CodeGen/SelectionDAG/FastISel.cpp index 507b2d6..5c84059 100644 --- a/llvm/lib/CodeGen/SelectionDAG/FastISel.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/FastISel.cpp @@ -1965,7 +1965,7 @@ Register FastISel::createResultReg(const TargetRegisterClass *RC) { Register FastISel::constrainOperandRegClass(const MCInstrDesc &II, Register Op, unsigned OpNum) { if (Op.isVirtual()) { - const TargetRegisterClass *RegClass = TII.getRegClass(II, OpNum, &TRI); + const TargetRegisterClass *RegClass = TII.getRegClass(II, OpNum); if (!MRI.constrainRegClass(Op, RegClass)) { // If it's not legal to COPY between the register classes, something // has gone very wrong before we got here. diff --git a/llvm/lib/CodeGen/SelectionDAG/InstrEmitter.cpp b/llvm/lib/CodeGen/SelectionDAG/InstrEmitter.cpp index d84c3fb..72d0c44 100644 --- a/llvm/lib/CodeGen/SelectionDAG/InstrEmitter.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/InstrEmitter.cpp @@ -125,7 +125,7 @@ void InstrEmitter::EmitCopyFromReg(SDValue Op, bool IsClone, Register SrcReg, const TargetRegisterClass *RC = nullptr; if (i + II.getNumDefs() < II.getNumOperands()) { RC = TRI->getAllocatableClass( - TII->getRegClass(II, i + II.getNumDefs(), TRI)); + TII->getRegClass(II, i + II.getNumDefs())); } if (!UseRC) UseRC = RC; @@ -197,7 +197,7 @@ void InstrEmitter::CreateVirtualRegisters(SDNode *Node, // register instead of creating a new vreg. Register VRBase; const TargetRegisterClass *RC = - TRI->getAllocatableClass(TII->getRegClass(II, i, TRI)); + TRI->getAllocatableClass(TII->getRegClass(II, i)); // Always let the value type influence the used register class. The // constraints on the instruction may be too lax to represent the value // type correctly. For example, a 64-bit float (X86::FR64) can't live in @@ -330,7 +330,7 @@ InstrEmitter::AddRegisterOperand(MachineInstrBuilder &MIB, if (II) { const TargetRegisterClass *OpRC = nullptr; if (IIOpNum < II->getNumOperands()) - OpRC = TII->getRegClass(*II, IIOpNum, TRI); + OpRC = TII->getRegClass(*II, IIOpNum); if (OpRC) { unsigned MinNumRegs = MinRCSize; @@ -409,8 +409,7 @@ void InstrEmitter::AddOperand(MachineInstrBuilder &MIB, SDValue Op, Register VReg = R->getReg(); MVT OpVT = Op.getSimpleValueType(); const TargetRegisterClass *IIRC = - II ? TRI->getAllocatableClass(TII->getRegClass(*II, IIOpNum, TRI)) - : nullptr; + II ? TRI->getAllocatableClass(TII->getRegClass(*II, IIOpNum)) : nullptr; const TargetRegisterClass *OpRC = TLI->isTypeLegal(OpVT) ? TLI->getRegClassFor(OpVT, diff --git a/llvm/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp b/llvm/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp index 316aacd..a0baf82 100644 --- a/llvm/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp @@ -4842,9 +4842,15 @@ void SelectionDAGLegalize::ConvertNodeToLibcall(SDNode *Node) { RTLIB::Libcall LC = Node->getOpcode() == ISD::FSINCOS ? RTLIB::getSINCOS(VT) : RTLIB::getSINCOSPI(VT); - bool Expanded = DAG.expandMultipleResultFPLibCall(LC, Node, Results); - if (!Expanded) - llvm_unreachable("Expected scalar FSINCOS[PI] to expand to libcall!"); + bool Expanded = DAG.expandMultipleResultFPLibCall(LC, Node, Results, VT); + if (!Expanded) { + DAG.getContext()->emitError(Twine("no libcall available for ") + + Node->getOperationName(&DAG)); + SDValue Poison = DAG.getPOISON(VT); + Results.push_back(Poison); + Results.push_back(Poison); + } + break; } case ISD::FLOG: @@ -4934,7 +4940,7 @@ void SelectionDAGLegalize::ConvertNodeToLibcall(SDNode *Node) { EVT VT = Node->getValueType(0); RTLIB::Libcall LC = Node->getOpcode() == ISD::FMODF ? RTLIB::getMODF(VT) : RTLIB::getFREXP(VT); - bool Expanded = DAG.expandMultipleResultFPLibCall(LC, Node, Results, + bool Expanded = DAG.expandMultipleResultFPLibCall(LC, Node, Results, VT, /*CallRetResNo=*/0); if (!Expanded) llvm_unreachable("Expected scalar FFREXP/FMODF to expand to libcall!"); diff --git a/llvm/lib/CodeGen/SelectionDAG/LegalizeFloatTypes.cpp b/llvm/lib/CodeGen/SelectionDAG/LegalizeFloatTypes.cpp index 58983cb..29c4dac 100644 --- a/llvm/lib/CodeGen/SelectionDAG/LegalizeFloatTypes.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/LegalizeFloatTypes.cpp @@ -1726,7 +1726,8 @@ void DAGTypeLegalizer::ExpandFloatRes_UnaryWithTwoFPResults( SDNode *N, RTLIB::Libcall LC, std::optional<unsigned> CallRetResNo) { assert(!N->isStrictFPOpcode() && "strictfp not implemented"); SmallVector<SDValue> Results; - DAG.expandMultipleResultFPLibCall(LC, N, Results, CallRetResNo); + DAG.expandMultipleResultFPLibCall(LC, N, Results, N->getValueType(0), + CallRetResNo); for (auto [ResNo, Res] : enumerate(Results)) { SDValue Lo, Hi; GetPairElements(Res, Lo, Hi); diff --git a/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp b/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp index 94751be5..f5a54497 100644 --- a/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp @@ -1268,20 +1268,30 @@ void VectorLegalizer::Expand(SDNode *Node, SmallVectorImpl<SDValue> &Results) { return; break; - case ISD::FSINCOS: + case ISD::FSINCOSPI: { + EVT VT = Node->getValueType(0); + RTLIB::Libcall LC = RTLIB::getSINCOSPI(VT); + if (LC != RTLIB::UNKNOWN_LIBCALL && + DAG.expandMultipleResultFPLibCall(LC, Node, Results, VT)) + return; + + // TODO: Try to see if there's a narrower call available to use before + // scalarizing. + break; + } + case ISD::FSINCOS: { + // FIXME: Try to directly match vector case like fsincospi EVT VT = Node->getValueType(0).getVectorElementType(); - RTLIB::Libcall LC = Node->getOpcode() == ISD::FSINCOS - ? RTLIB::getSINCOS(VT) - : RTLIB::getSINCOSPI(VT); - if (DAG.expandMultipleResultFPLibCall(LC, Node, Results)) + RTLIB::Libcall LC = RTLIB::getSINCOS(VT); + if (DAG.expandMultipleResultFPLibCall(LC, Node, Results, VT)) return; break; } case ISD::FMODF: { - RTLIB::Libcall LC = - RTLIB::getMODF(Node->getValueType(0).getVectorElementType()); - if (DAG.expandMultipleResultFPLibCall(LC, Node, Results, + EVT VT = Node->getValueType(0).getVectorElementType(); + RTLIB::Libcall LC = RTLIB::getMODF(VT); + if (DAG.expandMultipleResultFPLibCall(LC, Node, Results, VT, /*CallRetResNo=*/0)) return; break; diff --git a/llvm/lib/CodeGen/SelectionDAG/ScheduleDAGRRList.cpp b/llvm/lib/CodeGen/SelectionDAG/ScheduleDAGRRList.cpp index f70b6cd..12fc26d 100644 --- a/llvm/lib/CodeGen/SelectionDAG/ScheduleDAGRRList.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/ScheduleDAGRRList.cpp @@ -340,7 +340,7 @@ static void GetCostForDef(const ScheduleDAGSDNodes::RegDefIter &RegDefPos, unsigned Idx = RegDefPos.GetIdx(); const MCInstrDesc &Desc = TII->get(Opcode); - const TargetRegisterClass *RC = TII->getRegClass(Desc, Idx, TRI); + const TargetRegisterClass *RC = TII->getRegClass(Desc, Idx); assert(RC && "Not a valid register class"); RegClass = RC->getID(); // FIXME: Cost arbitrarily set to 1 because there doesn't seem to be a diff --git a/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp b/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp index 80bbfea..b5d502b 100644 --- a/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp @@ -2514,18 +2514,20 @@ static bool canFoldStoreIntoLibCallOutputPointers(StoreSDNode *StoreNode, bool SelectionDAG::expandMultipleResultFPLibCall( RTLIB::Libcall LC, SDNode *Node, SmallVectorImpl<SDValue> &Results, - std::optional<unsigned> CallRetResNo) { - LLVMContext &Ctx = *getContext(); - EVT VT = Node->getValueType(0); - unsigned NumResults = Node->getNumValues(); - + EVT CallVT, std::optional<unsigned> CallRetResNo) { if (LC == RTLIB::UNKNOWN_LIBCALL) return false; - const char *LCName = TLI->getLibcallName(LC); - if (!LCName) + EVT VT = Node->getValueType(0); + + RTLIB::LibcallImpl Impl = TLI->getLibcallImpl(LC); + if (Impl == RTLIB::Unsupported) return false; + StringRef LCName = TLI->getLibcallImplName(Impl); + + // FIXME: This should not use TargetLibraryInfo. There should be + // RTLIB::Libcall entries for each used vector type, and directly matched. auto getVecDesc = [&]() -> VecDesc const * { for (bool Masked : {false, true}) { if (VecDesc const *VD = getLibInfo().getVectorMappingInfo( @@ -2538,9 +2540,34 @@ bool SelectionDAG::expandMultipleResultFPLibCall( // For vector types, we must find a vector mapping for the libcall. VecDesc const *VD = nullptr; - if (VT.isVector() && !(VD = getVecDesc())) + if (VT.isVector() && !CallVT.isVector() && !(VD = getVecDesc())) return false; + bool IsMasked = (VD && VD->isMasked()) || + RTLIB::RuntimeLibcallsInfo::hasVectorMaskArgument(Impl); + + // This wrapper function exists because getVectorMappingInfo works in terms of + // function names instead of RTLIB enums. + + // FIXME: If we used a vector mapping, this assumes the calling convention of + // the vector function is the same as the scalar. + + StringRef Name = VD ? VD->getVectorFnName() : LCName; + + return expandMultipleResultFPLibCall(Name, + TLI->getLibcallImplCallingConv(Impl), + Node, Results, CallRetResNo, IsMasked); +} + +// FIXME: This belongs in TargetLowering +bool SelectionDAG::expandMultipleResultFPLibCall( + StringRef Name, CallingConv::ID CC, SDNode *Node, + SmallVectorImpl<SDValue> &Results, std::optional<unsigned> CallRetResNo, + bool IsMasked) { + LLVMContext &Ctx = *getContext(); + EVT VT = Node->getValueType(0); + unsigned NumResults = Node->getNumValues(); + // Find users of the node that store the results (and share input chains). The // destination pointers can be used instead of creating stack allocations. SDValue StoresInChain; @@ -2598,7 +2625,7 @@ bool SelectionDAG::expandMultipleResultFPLibCall( SDLoc DL(Node); // Pass the vector mask (if required). - if (VD && VD->isMasked()) { + if (IsMasked) { EVT MaskVT = TLI->getSetCCResultType(getDataLayout(), Ctx, VT); SDValue Mask = getBoolConstant(true, DL, MaskVT, VT); Args.emplace_back(Mask, MaskVT.getTypeForEVT(Ctx)); @@ -2608,11 +2635,11 @@ bool SelectionDAG::expandMultipleResultFPLibCall( ? Node->getValueType(*CallRetResNo).getTypeForEVT(Ctx) : Type::getVoidTy(Ctx); SDValue InChain = StoresInChain ? StoresInChain : getEntryNode(); - SDValue Callee = getExternalSymbol(VD ? VD->getVectorFnName().data() : LCName, - TLI->getPointerTy(getDataLayout())); + SDValue Callee = + getExternalSymbol(Name.data(), TLI->getPointerTy(getDataLayout())); TargetLowering::CallLoweringInfo CLI(*this); - CLI.setDebugLoc(DL).setChain(InChain).setLibCallee( - TLI->getLibcallCallingConv(LC), RetType, Callee, std::move(Args)); + CLI.setDebugLoc(DL).setChain(InChain).setLibCallee(CC, RetType, Callee, + std::move(Args)); auto [Call, CallChain] = TLI->LowerCallTo(CLI); @@ -2920,6 +2947,34 @@ bool SelectionDAG::SignBitIsZero(SDValue Op, unsigned Depth) const { return MaskedValueIsZero(Op, APInt::getSignMask(BitWidth), Depth); } +bool SelectionDAG::SignBitIsZeroFP(SDValue Op, unsigned Depth) const { + if (Depth >= MaxRecursionDepth) + return false; // Limit search depth. + + unsigned Opc = Op.getOpcode(); + switch (Opc) { + case ISD::FABS: + return true; + case ISD::AssertNoFPClass: { + FPClassTest NoFPClass = + static_cast<FPClassTest>(Op.getConstantOperandVal(1)); + + const FPClassTest TestMask = fcNan | fcNegative; + return (NoFPClass & TestMask) == TestMask; + } + case ISD::ARITH_FENCE: + return SignBitIsZeroFP(Op, Depth + 1); + case ISD::FEXP: + case ISD::FEXP2: + case ISD::FEXP10: + return Op->getFlags().hasNoNaNs(); + default: + return false; + } + + llvm_unreachable("covered opcode switch"); +} + /// MaskedValueIsZero - Return true if 'V & Mask' is known to be zero. We use /// this predicate to simplify operations downstream. Mask is known to be zero /// for bits that V cannot have. @@ -4121,6 +4176,25 @@ KnownBits SelectionDAG::computeKnownBits(SDValue Op, const APInt &DemandedElts, Known.One.clearLowBits(LogOfAlign); break; } + case ISD::AssertNoFPClass: { + Known = computeKnownBits(Op.getOperand(0), DemandedElts, Depth + 1); + + FPClassTest NoFPClass = + static_cast<FPClassTest>(Op.getConstantOperandVal(1)); + const FPClassTest NegativeTestMask = fcNan | fcNegative; + if ((NoFPClass & NegativeTestMask) == NegativeTestMask) { + // Cannot be negative. + Known.makeNonNegative(); + } + + const FPClassTest PositiveTestMask = fcNan | fcPositive; + if ((NoFPClass & PositiveTestMask) == PositiveTestMask) { + // Cannot be positive. + Known.makeNegative(); + } + + break; + } case ISD::FGETSIGN: // All bits are zero except the low bit. Known.Zero.setBitsFrom(1); diff --git a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp index 88b0809..6a9022d 100644 --- a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp @@ -4638,6 +4638,12 @@ static std::optional<ConstantRange> getRange(const Instruction &I) { return std::nullopt; } +static FPClassTest getNoFPClass(const Instruction &I) { + if (const auto *CB = dyn_cast<CallBase>(&I)) + return CB->getRetNoFPClass(); + return fcNone; +} + void SelectionDAGBuilder::visitLoad(const LoadInst &I) { if (I.isAtomic()) return visitAtomicLoad(I); @@ -9132,6 +9138,7 @@ void SelectionDAGBuilder::LowerCallTo(const CallBase &CB, SDValue Callee, if (Result.first.getNode()) { Result.first = lowerRangeToAssertZExt(DAG, CB, Result.first); + Result.first = lowerNoFPClassToAssertNoFPClass(DAG, CB, Result.first); setValue(&CB, Result.first); } @@ -10718,6 +10725,16 @@ SDValue SelectionDAGBuilder::lowerRangeToAssertZExt(SelectionDAG &DAG, return DAG.getMergeValues(Ops, SL); } +SDValue SelectionDAGBuilder::lowerNoFPClassToAssertNoFPClass( + SelectionDAG &DAG, const Instruction &I, SDValue Op) { + FPClassTest Classes = getNoFPClass(I); + if (Classes == fcNone) + return Op; + + return DAG.getNode(ISD::AssertNoFPClass, SDLoc(Op), Op.getValueType(), Op, + DAG.getTargetConstant(Classes, SDLoc(), MVT::i32)); +} + /// Populate a CallLowerinInfo (into \p CLI) based on the properties of /// the call being lowered. /// diff --git a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.h b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.h index ed63bee..13e2daa 100644 --- a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.h +++ b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.h @@ -429,6 +429,10 @@ public: SDValue lowerRangeToAssertZExt(SelectionDAG &DAG, const Instruction &I, SDValue Op); + // Lower nofpclass attributes to AssertNoFPClass + SDValue lowerNoFPClassToAssertNoFPClass(SelectionDAG &DAG, + const Instruction &I, SDValue Op); + void populateCallLoweringInfo(TargetLowering::CallLoweringInfo &CLI, const CallBase *Call, unsigned ArgIdx, unsigned NumArgs, SDValue Callee, diff --git a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp index 8bc5d2f..e78dfb1 100644 --- a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp @@ -2448,7 +2448,7 @@ bool SelectionDAGISel::IsLegalToFold(SDValue N, SDNode *U, SDNode *Root, // a cycle in the scheduling graph. // If the node has glue, walk down the graph to the "lowest" node in the - // glueged set. + // glued set. EVT VT = Root->getValueType(Root->getNumValues()-1); while (VT == MVT::Glue) { SDNode *GU = Root->getGluedUser(); diff --git a/llvm/lib/CodeGen/SplitKit.cpp b/llvm/lib/CodeGen/SplitKit.cpp index f9ecb2c..8ec4bfb 100644 --- a/llvm/lib/CodeGen/SplitKit.cpp +++ b/llvm/lib/CodeGen/SplitKit.cpp @@ -1509,10 +1509,9 @@ void SplitEditor::forceRecomputeVNI(const VNInfo &ParentVNI) { } // Trace value through phis. - SmallPtrSet<const VNInfo *, 8> Visited; ///< whether VNI was/is in worklist. - SmallVector<const VNInfo *, 4> WorkList; - Visited.insert(&ParentVNI); - WorkList.push_back(&ParentVNI); + ///< whether VNI was/is in worklist. + SmallPtrSet<const VNInfo *, 8> Visited = {&ParentVNI}; + SmallVector<const VNInfo *, 4> WorkList = {&ParentVNI}; const LiveInterval &ParentLI = Edit->getParent(); const SlotIndexes &Indexes = *LIS.getSlotIndexes(); diff --git a/llvm/lib/CodeGen/TargetFrameLoweringImpl.cpp b/llvm/lib/CodeGen/TargetFrameLoweringImpl.cpp index 70c3b2c..ebf6d1a 100644 --- a/llvm/lib/CodeGen/TargetFrameLoweringImpl.cpp +++ b/llvm/lib/CodeGen/TargetFrameLoweringImpl.cpp @@ -198,7 +198,7 @@ void TargetFrameLowering::spillCalleeSavedRegister( } else { const TargetRegisterClass *RC = TRI->getMinimalPhysRegClass(Reg); TII->storeRegToStackSlot(SaveBlock, MI, Reg, true, CS.getFrameIdx(), RC, - TRI, Register()); + Register()); } } @@ -212,8 +212,7 @@ void TargetFrameLowering::restoreCalleeSavedRegister( .addReg(CS.getDstReg(), getKillRegState(true)); } else { const TargetRegisterClass *RC = TRI->getMinimalPhysRegClass(Reg); - TII->loadRegFromStackSlot(MBB, MI, Reg, CS.getFrameIdx(), RC, TRI, - Register()); + TII->loadRegFromStackSlot(MBB, MI, Reg, CS.getFrameIdx(), RC, Register()); assert(MI != MBB.begin() && "loadRegFromStackSlot didn't insert any code!"); } } diff --git a/llvm/lib/CodeGen/TargetInstrInfo.cpp b/llvm/lib/CodeGen/TargetInstrInfo.cpp index 3c41bbe..d503d7a 100644 --- a/llvm/lib/CodeGen/TargetInstrInfo.cpp +++ b/llvm/lib/CodeGen/TargetInstrInfo.cpp @@ -58,9 +58,8 @@ static cl::opt<unsigned int> MaxAccumulatorWidth( TargetInstrInfo::~TargetInstrInfo() = default; -const TargetRegisterClass * -TargetInstrInfo::getRegClass(const MCInstrDesc &MCID, unsigned OpNum, - const TargetRegisterInfo *TRI) const { +const TargetRegisterClass *TargetInstrInfo::getRegClass(const MCInstrDesc &MCID, + unsigned OpNum) const { if (OpNum >= MCID.getNumOperands()) return nullptr; @@ -69,14 +68,14 @@ TargetInstrInfo::getRegClass(const MCInstrDesc &MCID, unsigned OpNum, // TODO: Remove isLookupPtrRegClass in favor of isLookupRegClassByHwMode if (OpInfo.isLookupPtrRegClass()) - return TRI->getPointerRegClass(RegClass); + return TRI.getPointerRegClass(RegClass); // Instructions like INSERT_SUBREG do not have fixed register classes. if (RegClass < 0) return nullptr; // Otherwise just look it up normally. - return TRI->getRegClass(RegClass); + return TRI.getRegClass(RegClass); } /// insertNoop - Insert a noop into the instruction stream at the specified @@ -223,13 +222,11 @@ MachineInstr *TargetInstrInfo::commuteInstructionImpl(MachineInstr &MI, // %1.sub = INST %1.sub(tied), %0.sub, implicit-def %1 SmallVector<unsigned> UpdateImplicitDefIdx; if (HasDef && MI.hasImplicitDef()) { - const TargetRegisterInfo *TRI = - MI.getMF()->getSubtarget().getRegisterInfo(); for (auto [OpNo, MO] : llvm::enumerate(MI.implicit_operands())) { Register ImplReg = MO.getReg(); if ((ImplReg.isVirtual() && ImplReg == Reg0) || (ImplReg.isPhysical() && Reg0.isPhysical() && - TRI->isSubRegisterEq(ImplReg, Reg0))) + TRI.isSubRegisterEq(ImplReg, Reg0))) UpdateImplicitDefIdx.push_back(OpNo + MI.getNumExplicitOperands()); } } @@ -425,28 +422,27 @@ bool TargetInstrInfo::getStackSlotRange(const TargetRegisterClass *RC, unsigned SubIdx, unsigned &Size, unsigned &Offset, const MachineFunction &MF) const { - const TargetRegisterInfo *TRI = MF.getSubtarget().getRegisterInfo(); if (!SubIdx) { - Size = TRI->getSpillSize(*RC); + Size = TRI.getSpillSize(*RC); Offset = 0; return true; } - unsigned BitSize = TRI->getSubRegIdxSize(SubIdx); + unsigned BitSize = TRI.getSubRegIdxSize(SubIdx); // Convert bit size to byte size. if (BitSize % 8) return false; - int BitOffset = TRI->getSubRegIdxOffset(SubIdx); + int BitOffset = TRI.getSubRegIdxOffset(SubIdx); if (BitOffset < 0 || BitOffset % 8) return false; Size = BitSize / 8; Offset = (unsigned)BitOffset / 8; - assert(TRI->getSpillSize(*RC) >= (Offset + Size) && "bad subregister range"); + assert(TRI.getSpillSize(*RC) >= (Offset + Size) && "bad subregister range"); if (!MF.getDataLayout().isLittleEndian()) { - Offset = TRI->getSpillSize(*RC) - (Offset + Size); + Offset = TRI.getSpillSize(*RC) - (Offset + Size); } return true; } @@ -454,8 +450,7 @@ bool TargetInstrInfo::getStackSlotRange(const TargetRegisterClass *RC, void TargetInstrInfo::reMaterialize(MachineBasicBlock &MBB, MachineBasicBlock::iterator I, Register DestReg, unsigned SubIdx, - const MachineInstr &Orig, - const TargetRegisterInfo &TRI) const { + const MachineInstr &Orig) const { MachineInstr *MI = MBB.getParent()->CloneMachineInstr(&Orig); MI->substituteRegister(MI->getOperand(0).getReg(), DestReg, SubIdx, TRI); MBB.insert(I, MI); @@ -726,7 +721,6 @@ MachineInstr *TargetInstrInfo::foldMemoryOperand(MachineInstr &MI, // actual load size is. int64_t MemSize = 0; const MachineFrameInfo &MFI = MF.getFrameInfo(); - const TargetRegisterInfo *TRI = MF.getSubtarget().getRegisterInfo(); if (Flags & MachineMemOperand::MOStore) { MemSize = MFI.getObjectSize(FI); @@ -735,7 +729,7 @@ MachineInstr *TargetInstrInfo::foldMemoryOperand(MachineInstr &MI, int64_t OpSize = MFI.getObjectSize(FI); if (auto SubReg = MI.getOperand(OpIdx).getSubReg()) { - unsigned SubRegSize = TRI->getSubRegIdxSize(SubReg); + unsigned SubRegSize = TRI.getSubRegIdxSize(SubReg); if (SubRegSize > 0 && !(SubRegSize % 8)) OpSize = SubRegSize / 8; } @@ -800,11 +794,11 @@ MachineInstr *TargetInstrInfo::foldMemoryOperand(MachineInstr &MI, // code. BuildMI(*MBB, Pos, MI.getDebugLoc(), get(TargetOpcode::KILL)).add(MO); } else { - storeRegToStackSlot(*MBB, Pos, MO.getReg(), MO.isKill(), FI, RC, TRI, + storeRegToStackSlot(*MBB, Pos, MO.getReg(), MO.isKill(), FI, RC, Register()); } } else - loadRegFromStackSlot(*MBB, Pos, MO.getReg(), FI, RC, TRI, Register()); + loadRegFromStackSlot(*MBB, Pos, MO.getReg(), FI, RC, Register()); return &*--Pos; } @@ -880,8 +874,8 @@ static void transferImplicitOperands(MachineInstr *MI, } } -void TargetInstrInfo::lowerCopy(MachineInstr *MI, - const TargetRegisterInfo *TRI) const { +void TargetInstrInfo::lowerCopy( + MachineInstr *MI, const TargetRegisterInfo * /*Remove me*/) const { if (MI->allDefsAreDead()) { MI->setDesc(get(TargetOpcode::KILL)); return; @@ -911,7 +905,7 @@ void TargetInstrInfo::lowerCopy(MachineInstr *MI, SrcMO.getReg().isPhysical() ? SrcMO.isRenamable() : false); if (MI->getNumOperands() > 2) - transferImplicitOperands(MI, TRI); + transferImplicitOperands(MI, &TRI); MI->eraseFromParent(); } @@ -1327,8 +1321,7 @@ void TargetInstrInfo::reassociateOps( MachineFunction *MF = Root.getMF(); MachineRegisterInfo &MRI = MF->getRegInfo(); const TargetInstrInfo *TII = MF->getSubtarget().getInstrInfo(); - const TargetRegisterInfo *TRI = MF->getSubtarget().getRegisterInfo(); - const TargetRegisterClass *RC = Root.getRegClassConstraint(0, TII, TRI); + const TargetRegisterClass *RC = Root.getRegClassConstraint(0, TII, &TRI); MachineOperand &OpA = Prev.getOperand(OperandIndices[1]); MachineOperand &OpB = Root.getOperand(OperandIndices[2]); @@ -1337,9 +1330,12 @@ void TargetInstrInfo::reassociateOps( MachineOperand &OpC = Root.getOperand(0); Register RegA = OpA.getReg(); + unsigned SubRegA = OpA.getSubReg(); Register RegB = OpB.getReg(); Register RegX = OpX.getReg(); + unsigned SubRegX = OpX.getSubReg(); Register RegY = OpY.getReg(); + unsigned SubRegY = OpY.getSubReg(); Register RegC = OpC.getReg(); if (RegA.isVirtual()) @@ -1357,6 +1353,7 @@ void TargetInstrInfo::reassociateOps( // recycling RegB because the MachineCombiner's computation of the critical // path requires a new register definition rather than an existing one. Register NewVR = MRI.createVirtualRegister(RC); + unsigned SubRegNewVR = 0; InstrIdxForVirtReg.insert(std::make_pair(NewVR, 0)); auto [NewRootOpc, NewPrevOpc] = getReassociationOpcodes(Pattern, Root, Prev); @@ -1369,6 +1366,7 @@ void TargetInstrInfo::reassociateOps( if (SwapPrevOperands) { std::swap(RegX, RegY); + std::swap(SubRegX, SubRegY); std::swap(KillX, KillY); } @@ -1421,9 +1419,9 @@ void TargetInstrInfo::reassociateOps( if (Idx == 0) continue; if (Idx == PrevFirstOpIdx) - MIB1.addReg(RegX, getKillRegState(KillX)); + MIB1.addReg(RegX, getKillRegState(KillX), SubRegX); else if (Idx == PrevSecondOpIdx) - MIB1.addReg(RegY, getKillRegState(KillY)); + MIB1.addReg(RegY, getKillRegState(KillY), SubRegY); else MIB1.add(MO); } @@ -1431,6 +1429,7 @@ void TargetInstrInfo::reassociateOps( if (SwapRootOperands) { std::swap(RegA, NewVR); + std::swap(SubRegA, SubRegNewVR); std::swap(KillA, KillNewVR); } @@ -1442,9 +1441,9 @@ void TargetInstrInfo::reassociateOps( if (Idx == 0) continue; if (Idx == RootFirstOpIdx) - MIB2 = MIB2.addReg(RegA, getKillRegState(KillA)); + MIB2 = MIB2.addReg(RegA, getKillRegState(KillA), SubRegA); else if (Idx == RootSecondOpIdx) - MIB2 = MIB2.addReg(NewVR, getKillRegState(KillNewVR)); + MIB2 = MIB2.addReg(NewVR, getKillRegState(KillNewVR), SubRegNewVR); else MIB2 = MIB2.add(MO); } @@ -1532,6 +1531,7 @@ void TargetInstrInfo::genAlternativeCodeSequence( if (IndexedReg.index() == 0) continue; + // FIXME: Losing subregisters MachineInstr *Instr = MRI.getUniqueVRegDef(IndexedReg.value()); MachineInstrBuilder MIB; Register AccReg; @@ -1704,8 +1704,7 @@ bool TargetInstrInfo::isSchedulingBoundary(const MachineInstr &MI, // stack slot reference to depend on the instruction that does the // modification. const TargetLowering &TLI = *MF.getSubtarget().getTargetLowering(); - const TargetRegisterInfo *TRI = MF.getSubtarget().getRegisterInfo(); - return MI.modifiesRegister(TLI.getStackPointerRegisterToSaveRestore(), TRI); + return MI.modifiesRegister(TLI.getStackPointerRegisterToSaveRestore(), &TRI); } // Provide a global flag for disabling the PreRA hazard recognizer that targets @@ -1738,11 +1737,11 @@ CreateTargetPostRAHazardRecognizer(const InstrItineraryData *II, // Default implementation of getMemOperandWithOffset. bool TargetInstrInfo::getMemOperandWithOffset( const MachineInstr &MI, const MachineOperand *&BaseOp, int64_t &Offset, - bool &OffsetIsScalable, const TargetRegisterInfo *TRI) const { + bool &OffsetIsScalable, const TargetRegisterInfo * /*RemoveMe*/) const { SmallVector<const MachineOperand *, 4> BaseOps; LocationSize Width = LocationSize::precise(0); if (!getMemOperandsWithOffsetWidth(MI, BaseOps, Offset, OffsetIsScalable, - Width, TRI) || + Width, &TRI) || BaseOps.size() != 1) return false; BaseOp = BaseOps.front(); @@ -1863,7 +1862,6 @@ std::optional<ParamLoadedValue> TargetInstrInfo::describeLoadedValue(const MachineInstr &MI, Register Reg) const { const MachineFunction *MF = MI.getMF(); - const TargetRegisterInfo *TRI = MF->getSubtarget().getRegisterInfo(); DIExpression *Expr = DIExpression::get(MF->getFunction().getContext(), {}); int64_t Offset; bool OffsetIsScalable; @@ -1894,7 +1892,6 @@ TargetInstrInfo::describeLoadedValue(const MachineInstr &MI, // Only describe memory which provably does not escape the function. As // described in llvm.org/PR43343, escaped memory may be clobbered by the // callee (or by another thread). - const auto &TII = MF->getSubtarget().getInstrInfo(); const MachineFrameInfo &MFI = MF->getFrameInfo(); const MachineMemOperand *MMO = MI.memoperands()[0]; const PseudoSourceValue *PSV = MMO->getPseudoValue(); @@ -1905,8 +1902,7 @@ TargetInstrInfo::describeLoadedValue(const MachineInstr &MI, return std::nullopt; const MachineOperand *BaseOp; - if (!TII->getMemOperandWithOffset(MI, BaseOp, Offset, OffsetIsScalable, - TRI)) + if (!getMemOperandWithOffset(MI, BaseOp, Offset, OffsetIsScalable, &TRI)) return std::nullopt; // FIXME: Scalable offsets are not yet handled in the offset code below. @@ -2045,7 +2041,7 @@ bool TargetInstrInfo::getInsertSubregInputs( // Returns a MIRPrinter comment for this machine operand. std::string TargetInstrInfo::createMIROperandComment( const MachineInstr &MI, const MachineOperand &Op, unsigned OpIdx, - const TargetRegisterInfo *TRI) const { + const TargetRegisterInfo * /*RemoveMe*/) const { if (!MI.isInlineAsm()) return ""; @@ -2078,12 +2074,8 @@ std::string TargetInstrInfo::createMIROperandComment( OS << F.getKindName(); unsigned RCID; - if (!F.isImmKind() && !F.isMemKind() && F.hasRegClassConstraint(RCID)) { - if (TRI) { - OS << ':' << TRI->getRegClassName(TRI->getRegClass(RCID)); - } else - OS << ":RC" << RCID; - } + if (!F.isImmKind() && !F.isMemKind() && F.hasRegClassConstraint(RCID)) + OS << ':' << TRI.getRegClassName(TRI.getRegClass(RCID)); if (F.isMemKind()) { InlineAsm::ConstraintCode MCID = F.getMemoryConstraintID(); diff --git a/llvm/lib/CodeGen/TargetLoweringBase.cpp b/llvm/lib/CodeGen/TargetLoweringBase.cpp index 1cc591c..814b4b5 100644 --- a/llvm/lib/CodeGen/TargetLoweringBase.cpp +++ b/llvm/lib/CodeGen/TargetLoweringBase.cpp @@ -430,6 +430,24 @@ RTLIB::Libcall RTLIB::getSINCOS(EVT RetVT) { } RTLIB::Libcall RTLIB::getSINCOSPI(EVT RetVT) { + // TODO: Tablegen should generate this function + if (RetVT.isVector()) { + if (!RetVT.isSimple()) + return RTLIB::UNKNOWN_LIBCALL; + switch (RetVT.getSimpleVT().SimpleTy) { + case MVT::v4f32: + return RTLIB::SINCOSPI_V4F32; + case MVT::v2f64: + return RTLIB::SINCOSPI_V2F64; + case MVT::nxv4f32: + return RTLIB::SINCOSPI_NXV4F32; + case MVT::nxv2f64: + return RTLIB::SINCOSPI_NXV2F64; + default: + return RTLIB::UNKNOWN_LIBCALL; + } + } + return getFPLibCall(RetVT, SINCOSPI_F32, SINCOSPI_F64, SINCOSPI_F80, SINCOSPI_F128, SINCOSPI_PPCF128); } diff --git a/llvm/lib/CodeGen/TwoAddressInstructionPass.cpp b/llvm/lib/CodeGen/TwoAddressInstructionPass.cpp index b99e1c7..3f2961c 100644 --- a/llvm/lib/CodeGen/TwoAddressInstructionPass.cpp +++ b/llvm/lib/CodeGen/TwoAddressInstructionPass.cpp @@ -1402,7 +1402,7 @@ bool TwoAddressInstructionImpl::tryInstructionTransform( // Unfold the load. LLVM_DEBUG(dbgs() << "2addr: UNFOLDING: " << MI); const TargetRegisterClass *RC = TRI->getAllocatableClass( - TII->getRegClass(UnfoldMCID, LoadRegIndex, TRI)); + TII->getRegClass(UnfoldMCID, LoadRegIndex)); Register Reg = MRI->createVirtualRegister(RC); SmallVector<MachineInstr *, 2> NewMIs; if (!TII->unfoldMemoryOperand(*MF, MI, Reg, |
