diff options
Diffstat (limited to 'llvm/lib')
164 files changed, 1650 insertions, 1402 deletions
diff --git a/llvm/lib/Analysis/CMakeLists.txt b/llvm/lib/Analysis/CMakeLists.txt index 88ebd65..bff9b62 100644 --- a/llvm/lib/Analysis/CMakeLists.txt +++ b/llvm/lib/Analysis/CMakeLists.txt @@ -89,7 +89,6 @@ add_llvm_component_library(LLVMAnalysis InlineCost.cpp InlineAdvisor.cpp InlineOrder.cpp - InlineSizeEstimatorAnalysis.cpp InstCount.cpp InstructionPrecedenceTracking.cpp InstructionSimplify.cpp diff --git a/llvm/lib/Analysis/DevelopmentModeInlineAdvisor.cpp b/llvm/lib/Analysis/DevelopmentModeInlineAdvisor.cpp index 67e38ab..d2be805 100644 --- a/llvm/lib/Analysis/DevelopmentModeInlineAdvisor.cpp +++ b/llvm/lib/Analysis/DevelopmentModeInlineAdvisor.cpp @@ -16,7 +16,6 @@ #include "llvm/ADT/BitVector.h" #include "llvm/Analysis/CallGraph.h" -#include "llvm/Analysis/InlineSizeEstimatorAnalysis.h" #include "llvm/Analysis/MLInlineAdvisor.h" #include "llvm/Analysis/ModelUnderTrainingRunner.h" #include "llvm/Analysis/NoInferenceModelRunner.h" @@ -89,9 +88,6 @@ struct InlineEvent { /// error, even if AdvisedDecision were true, otherwise it agrees with /// AdvisedDecision. bool Effect = false; - - /// What the change in size was: size_after - size_before - int64_t Reward = 0; }; /// Collect data we may use for training a model. @@ -150,31 +146,15 @@ public: GetModelRunner, std::function<bool(CallBase &)> GetDefaultAdvice); - size_t getTotalSizeEstimate(); - - void updateNativeSizeEstimate(int64_t Change) { - *CurrentNativeSize += Change; - } - void resetNativeSize(Function *F) { - PreservedAnalyses PA = PreservedAnalyses::all(); - PA.abandon<InlineSizeEstimatorAnalysis>(); - FAM.invalidate(*F, PA); - } - std::unique_ptr<MLInlineAdvice> getAdviceFromModel(CallBase &CB, OptimizationRemarkEmitter &ORE) override; - std::optional<size_t> getNativeSizeEstimate(const Function &F) const; - private: bool isLogging() const { return !!Logger; } std::unique_ptr<MLInlineAdvice> getMandatoryAdviceImpl(CallBase &CB) override; const bool IsDoingInference; std::unique_ptr<TrainingLogger> Logger; - - const std::optional<int32_t> InitialNativeSize; - std::optional<int32_t> CurrentNativeSize; }; /// A variant of MLInlineAdvice that tracks all non-trivial inlining @@ -183,13 +163,9 @@ class LoggingMLInlineAdvice : public MLInlineAdvice { public: LoggingMLInlineAdvice(DevelopmentModeMLInlineAdvisor *Advisor, CallBase &CB, OptimizationRemarkEmitter &ORE, bool Recommendation, - TrainingLogger &Logger, - std::optional<size_t> CallerSizeEstimateBefore, - std::optional<size_t> CalleeSizeEstimateBefore, - bool DefaultDecision, bool Mandatory = false) + TrainingLogger &Logger, bool DefaultDecision, + bool Mandatory = false) : MLInlineAdvice(Advisor, CB, ORE, Recommendation), Logger(Logger), - CallerSizeEstimateBefore(CallerSizeEstimateBefore), - CalleeSizeEstimateBefore(CalleeSizeEstimateBefore), DefaultDecision(DefaultDecision), Mandatory(Mandatory) {} virtual ~LoggingMLInlineAdvice() = default; @@ -200,59 +176,35 @@ private: } void recordInliningImpl() override { MLInlineAdvice::recordInliningImpl(); - getAdvisor()->resetNativeSize(Caller); - int Reward = std::numeric_limits<int>::max(); - if (InlineSizeEstimatorAnalysis::isEvaluatorRequested() && - !getAdvisor()->isForcedToStop()) { - int NativeSizeAfter = *getAdvisor()->getNativeSizeEstimate(*Caller) + - *CalleeSizeEstimateBefore; - Reward = NativeSizeAfter - - (*CallerSizeEstimateBefore + *CalleeSizeEstimateBefore); - getAdvisor()->updateNativeSizeEstimate(Reward); - } - log(Reward, /*Success=*/true); + log(/*Success=*/true); } void recordInliningWithCalleeDeletedImpl() override { MLInlineAdvice::recordInliningWithCalleeDeletedImpl(); - getAdvisor()->resetNativeSize(Caller); - if (InlineSizeEstimatorAnalysis::isEvaluatorRequested() && - !getAdvisor()->isForcedToStop()) { - int NativeSizeAfter = *getAdvisor()->getNativeSizeEstimate(*Caller); - int Reward = NativeSizeAfter - - (*CallerSizeEstimateBefore + *CalleeSizeEstimateBefore); - getAdvisor()->updateNativeSizeEstimate(Reward); - log(Reward, /*Success=*/true); - } else { - log(NoReward, /*Success=*/true); - } + log(/*Success=*/true); } void recordUnsuccessfulInliningImpl(const InlineResult &Result) override { MLInlineAdvice::recordUnsuccessfulInliningImpl(Result); - log(NoReward, /*Success=*/false); + log(/*Success=*/false); } void recordUnattemptedInliningImpl() override { MLInlineAdvice::recordUnattemptedInliningImpl(); - log(NoReward, /*Success=*/false); + log(/*Success=*/false); } - void log(int64_t Reward, bool Success) { + void log(bool Success) { if (Mandatory) return; InlineEvent Event; Event.AdvisedDecision = isInliningRecommended(); Event.DefaultDecision = DefaultDecision; Event.Effect = Success; - Event.Reward = Reward; Logger.logInlineEvent(Event, getAdvisor()->getModelRunner()); } - static const int64_t NoReward = 0; TrainingLogger &Logger; - const std::optional<size_t> CallerSizeEstimateBefore; - const std::optional<size_t> CalleeSizeEstimateBefore; const int64_t DefaultDecision; const int64_t Mandatory; }; @@ -296,9 +248,9 @@ TrainingLogger::TrainingLogger(StringRef LogFileName, if (EC) dbgs() << (EC.message() + ":" + TrainingLog); - L = std::make_unique<Logger>( - std::move(OS), FT, TensorSpec::createSpec<int64_t>(RewardName, {1}), - InlineSizeEstimatorAnalysis::isEvaluatorRequested()); + L = std::make_unique<Logger>(std::move(OS), FT, + TensorSpec::createSpec<int64_t>(RewardName, {1}), + false); L->switchContext(""); } @@ -326,8 +278,6 @@ void TrainingLogger::logInlineEvent(const InlineEvent &Event, L->logTensorValue(DecisionPos, reinterpret_cast<const char *>(&Event.AdvisedDecision)); L->endObservation(); - if (InlineSizeEstimatorAnalysis::isEvaluatorRequested()) - L->logReward(Event.Reward); // For debugging / later use Effects.push_back(Event.Effect); @@ -340,9 +290,7 @@ DevelopmentModeMLInlineAdvisor::DevelopmentModeMLInlineAdvisor( GetModelRunner, std::function<bool(CallBase &)> GetDefaultAdvice) : MLInlineAdvisor(M, MAM, GetModelRunner, GetDefaultAdvice), - IsDoingInference(isa<ModelUnderTrainingRunner>(getModelRunner())), - InitialNativeSize(isLogging() ? getTotalSizeEstimate() : 0), - CurrentNativeSize(InitialNativeSize) { + IsDoingInference(isa<ModelUnderTrainingRunner>(getModelRunner())) { // We cannot have the case of neither inference nor logging. if (!TrainingLog.empty()) Logger = std::make_unique<TrainingLogger>( @@ -351,29 +299,12 @@ DevelopmentModeMLInlineAdvisor::DevelopmentModeMLInlineAdvisor( assert(IsDoingInference || isLogging()); } -std::optional<size_t> -DevelopmentModeMLInlineAdvisor::getNativeSizeEstimate(const Function &F) const { - if (!InlineSizeEstimatorAnalysis::isEvaluatorRequested()) - return std::nullopt; - auto &R = - FAM.getResult<InlineSizeEstimatorAnalysis>(const_cast<Function &>(F)); - if (!R) { - F.getParent()->getContext().emitError( - "Native size estimator is not present."); - return 0; - } - return *R; -} - std::unique_ptr<MLInlineAdvice> DevelopmentModeMLInlineAdvisor::getMandatoryAdviceImpl(CallBase &CB) { return std::make_unique<LoggingMLInlineAdvice>( /*Advisor=*/this, /*CB=*/CB, /*ORE=*/getCallerORE(CB), /*Recommendation=*/true, /*Logger=*/*Logger, - /*CallerSizeEstimateBefore=*/getNativeSizeEstimate(*CB.getCaller()), - /*CalleeSizeEstimateBefore=*/ - getNativeSizeEstimate(*CB.getCalledFunction()), /*DefaultDecision=*/true, /*Mandatory*/ true); } @@ -391,24 +322,9 @@ DevelopmentModeMLInlineAdvisor::getAdviceFromModel( /*Advisor=*/this, /*CB=*/CB, /*ORE=*/ORE, /*Recommendation=*/Recommendation, /*Logger=*/*Logger, - /*CallerSizeEstimateBefore=*/getNativeSizeEstimate(*CB.getCaller()), - /*CalleeSizeEstimateBefore=*/ - getNativeSizeEstimate(*CB.getCalledFunction()), /*DefaultDecision=*/DefaultAdvice); } -size_t DevelopmentModeMLInlineAdvisor::getTotalSizeEstimate() { - if (!InlineSizeEstimatorAnalysis::isEvaluatorRequested()) - return 0; - size_t Ret = 0; - for (auto &F : M) { - if (F.isDeclaration()) - continue; - Ret += *getNativeSizeEstimate(F); - } - return Ret; -} - std::unique_ptr<InlineAdvisor> llvm::getDevelopmentModeAdvisor( Module &M, ModuleAnalysisManager &MAM, std::function<bool(CallBase &)> GetDefaultAdvice) { diff --git a/llvm/lib/Analysis/InlineSizeEstimatorAnalysis.cpp b/llvm/lib/Analysis/InlineSizeEstimatorAnalysis.cpp deleted file mode 100644 index fc635726a..0000000 --- a/llvm/lib/Analysis/InlineSizeEstimatorAnalysis.cpp +++ /dev/null @@ -1,281 +0,0 @@ -//===- InlineSizeEstimatorAnalysis.cpp - IR to native size from ML model --===// -// -// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. -// See https://llvm.org/LICENSE.txt for license information. -// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -// -//===----------------------------------------------------------------------===// -// -// This implements feature and label extraction for offline supervised learning -// of a IR to native size model. -// -//===----------------------------------------------------------------------===// -#include "llvm/Analysis/InlineSizeEstimatorAnalysis.h" - -#ifdef LLVM_HAVE_TFLITE -#include "llvm/Analysis/Utils/TFUtils.h" -#endif -#include "llvm/IR/Function.h" -#include "llvm/IR/PassManager.h" -#include "llvm/Support/raw_ostream.h" - -using namespace llvm; - -AnalysisKey InlineSizeEstimatorAnalysis::Key; - -#ifdef LLVM_HAVE_TFLITE -#include "llvm/Analysis/LoopInfo.h" -#include "llvm/Analysis/TargetLibraryInfo.h" -#include "llvm/Analysis/TargetTransformInfo.h" -#include "llvm/IR/BasicBlock.h" -#include "llvm/IR/Dominators.h" -#include "llvm/IR/Instructions.h" -#include "llvm/Support/Casting.h" -#include "llvm/Support/CommandLine.h" -#include <algorithm> -#include <deque> -#include <optional> - -static cl::opt<std::string> TFIR2NativeModelPath( - "ml-inliner-ir2native-model", cl::Hidden, - cl::desc("Path to saved model evaluating native size from IR.")); - -#define DEBUG_TYPE "inline-size-estimator" -namespace { -unsigned getMaxInstructionID() { -#define LAST_OTHER_INST(NR) return NR; -#include "llvm/IR/Instruction.def" -} - -class IRToNativeSizeLearning { -public: - enum class NamedFeatureIndex : size_t { - InitialSize, - Blocks, - Calls, - IsLocal, - IsLinkOnceODR, - IsLinkOnce, - Loops, - MaxLoopDepth, - MaxDomTreeLevel, - - NumNamedFeatures - }; - static const size_t NumNamedFeatures = - static_cast<size_t>(NamedFeatureIndex::NumNamedFeatures); - struct FunctionFeatures { - static const size_t FeatureCount; - - std::array<int32_t, NumNamedFeatures> NamedFeatures = {0}; - std::vector<int32_t> InstructionHistogram; - std::vector<int32_t> InstructionPairHistogram; - - void fillTensor(int32_t *Ptr) const; - int32_t &operator[](NamedFeatureIndex Pos) { - return NamedFeatures[static_cast<size_t>(Pos)]; - } - }; - IRToNativeSizeLearning() = default; - - static FunctionFeatures getFunctionFeatures(Function &F, - FunctionAnalysisManager &FAM); -}; - -// This is a point in time - we determined including these pairs of -// consecutive instructions (in the IR layout available at inline time) as -// features improves the model performance. We want to move away from manual -// feature selection. -// The array is given in opcode pairs rather than labels because 1) labels -// weren't readily available, and 2) the successions were hand - extracted. -// -// This array must be sorted. -static const std::array<std::pair<size_t, size_t>, 137> - ImportantInstructionSuccessions{ - {{1, 1}, {1, 4}, {1, 5}, {1, 7}, {1, 8}, {1, 9}, {1, 11}, - {1, 12}, {1, 13}, {1, 14}, {1, 18}, {1, 20}, {1, 22}, {1, 24}, - {1, 25}, {1, 26}, {1, 27}, {1, 28}, {1, 29}, {1, 30}, {1, 31}, - {1, 32}, {1, 33}, {1, 34}, {1, 39}, {1, 40}, {1, 42}, {1, 45}, - {2, 1}, {2, 2}, {2, 13}, {2, 28}, {2, 29}, {2, 32}, {2, 33}, - {2, 34}, {2, 38}, {2, 48}, {2, 49}, {2, 53}, {2, 55}, {2, 56}, - {13, 2}, {13, 13}, {13, 26}, {13, 33}, {13, 34}, {13, 56}, {15, 27}, - {28, 2}, {28, 48}, {28, 53}, {29, 2}, {29, 33}, {29, 56}, {31, 31}, - {31, 33}, {31, 34}, {31, 49}, {32, 1}, {32, 2}, {32, 13}, {32, 15}, - {32, 28}, {32, 29}, {32, 32}, {32, 33}, {32, 34}, {32, 39}, {32, 40}, - {32, 48}, {32, 49}, {32, 53}, {32, 56}, {33, 1}, {33, 2}, {33, 32}, - {33, 33}, {33, 34}, {33, 49}, {33, 53}, {33, 56}, {34, 1}, {34, 2}, - {34, 32}, {34, 33}, {34, 34}, {34, 49}, {34, 53}, {34, 56}, {38, 34}, - {39, 57}, {40, 34}, {47, 15}, {47, 49}, {48, 2}, {48, 34}, {48, 56}, - {49, 1}, {49, 2}, {49, 28}, {49, 32}, {49, 33}, {49, 34}, {49, 39}, - {49, 49}, {49, 56}, {53, 1}, {53, 2}, {53, 28}, {53, 34}, {53, 53}, - {53, 57}, {55, 1}, {55, 28}, {55, 34}, {55, 53}, {55, 55}, {55, 56}, - {56, 1}, {56, 2}, {56, 7}, {56, 13}, {56, 32}, {56, 33}, {56, 34}, - {56, 49}, {56, 53}, {56, 56}, {56, 64}, {57, 34}, {57, 56}, {57, 57}, - {64, 1}, {64, 64}, {65, 1}, {65, 65}}}; - -// We have: 9 calculated features (the features here); 1 feature for each -// instruction opcode; and 1 feature for each manually-identified sequence. -// For the latter 2, we build a histogram: we count the number of -// occurrences of each instruction opcode or succession of instructions, -// respectively. -// Note that instruction opcodes start from 1. For convenience, we also have an -// always 0 feature for the '0' opcode, hence the extra 1. -const size_t IRToNativeSizeLearning::FunctionFeatures::FeatureCount = - ImportantInstructionSuccessions.size() + getMaxInstructionID() + 1 + - IRToNativeSizeLearning::NumNamedFeatures; - -size_t getSize(Function &F, TargetTransformInfo &TTI) { - size_t Ret = 0; - for (const auto &BB : F) - for (const auto &I : BB) - Ret += TTI.getInstructionCost( - &I, TargetTransformInfo::TargetCostKind::TCK_CodeSize) - .getValue(); - return Ret; -} - -size_t getSize(Function &F, FunctionAnalysisManager &FAM) { - auto &TTI = FAM.getResult<TargetIRAnalysis>(F); - return getSize(F, TTI); -} - -unsigned getMaxDominatorTreeDepth(const Function &F, - const DominatorTree &Tree) { - unsigned Ret = 0; - for (const auto &BB : F) - if (const auto *TN = Tree.getNode(&BB)) - Ret = std::max(Ret, TN->getLevel()); - return Ret; -} -} // namespace - -IRToNativeSizeLearning::FunctionFeatures -IRToNativeSizeLearning::getFunctionFeatures(Function &F, - FunctionAnalysisManager &FAM) { - assert(llvm::is_sorted(ImportantInstructionSuccessions) && - "expected function features are sorted"); - - auto &DomTree = FAM.getResult<DominatorTreeAnalysis>(F); - FunctionFeatures FF; - size_t InstrCount = getMaxInstructionID() + 1; - FF.InstructionHistogram.resize(InstrCount); - - FF.InstructionPairHistogram.resize(ImportantInstructionSuccessions.size()); - - int StartID = 0; - int LastID = StartID; - auto getPairIndex = [](size_t a, size_t b) { - auto I = llvm::find(ImportantInstructionSuccessions, std::make_pair(a, b)); - if (I == ImportantInstructionSuccessions.end()) - return -1; - return static_cast<int>( - std::distance(ImportantInstructionSuccessions.begin(), I)); - }; - - // We don't want debug calls, because they'd just add noise. - for (const auto &BB : F) { - for (const auto &I : BB.instructionsWithoutDebug()) { - auto ID = I.getOpcode(); - - ++FF.InstructionHistogram[ID]; - int PairIndex = getPairIndex(LastID, ID); - if (PairIndex >= 0) - ++FF.InstructionPairHistogram[PairIndex]; - LastID = ID; - if (isa<CallBase>(I)) - ++FF[NamedFeatureIndex::Calls]; - } - } - - FF[NamedFeatureIndex::InitialSize] = getSize(F, FAM); - FF[NamedFeatureIndex::IsLocal] = F.hasLocalLinkage(); - FF[NamedFeatureIndex::IsLinkOnceODR] = F.hasLinkOnceODRLinkage(); - FF[NamedFeatureIndex::IsLinkOnce] = F.hasLinkOnceLinkage(); - FF[NamedFeatureIndex::Blocks] = F.size(); - auto &LI = FAM.getResult<LoopAnalysis>(F); - FF[NamedFeatureIndex::Loops] = std::distance(LI.begin(), LI.end()); - for (auto &L : LI) - FF[NamedFeatureIndex::MaxLoopDepth] = - std::max(FF[NamedFeatureIndex::MaxLoopDepth], - static_cast<int32_t>(L->getLoopDepth())); - FF[NamedFeatureIndex::MaxDomTreeLevel] = getMaxDominatorTreeDepth(F, DomTree); - return FF; -} - -void IRToNativeSizeLearning::FunctionFeatures::fillTensor(int32_t *Ptr) const { - std::copy(NamedFeatures.begin(), NamedFeatures.end(), Ptr); - Ptr += NamedFeatures.size(); - std::copy(InstructionHistogram.begin(), InstructionHistogram.end(), Ptr); - Ptr += InstructionHistogram.size(); - std::copy(InstructionPairHistogram.begin(), InstructionPairHistogram.end(), - Ptr); -} - -bool InlineSizeEstimatorAnalysis::isEvaluatorRequested() { - return !TFIR2NativeModelPath.empty(); -} - -InlineSizeEstimatorAnalysis::InlineSizeEstimatorAnalysis() { - if (!isEvaluatorRequested()) { - return; - } - std::vector<TensorSpec> InputSpecs{TensorSpec::createSpec<int32_t>( - "serving_default_input_1", - {1, static_cast<int64_t>( - IRToNativeSizeLearning::FunctionFeatures::FeatureCount)})}; - std::vector<TensorSpec> OutputSpecs{ - TensorSpec::createSpec<float>("StatefulPartitionedCall", {1})}; - Evaluator = std::make_unique<TFModelEvaluator>( - TFIR2NativeModelPath.getValue().c_str(), InputSpecs, OutputSpecs); - if (!Evaluator || !Evaluator->isValid()) { - Evaluator.reset(); - return; - } -} - -InlineSizeEstimatorAnalysis::Result -InlineSizeEstimatorAnalysis::run(const Function &F, - FunctionAnalysisManager &FAM) { - if (!Evaluator) - return std::nullopt; - auto Features = IRToNativeSizeLearning::getFunctionFeatures( - const_cast<Function &>(F), FAM); - int32_t *V = Evaluator->getInput<int32_t>(0); - Features.fillTensor(V); - auto ER = Evaluator->evaluate(); - if (!ER) - return std::nullopt; - float Ret = *ER->getTensorValue<float>(0); - if (Ret < 0.0) - Ret = 0.0; - return static_cast<size_t>(Ret); -} - -InlineSizeEstimatorAnalysis::~InlineSizeEstimatorAnalysis() {} -InlineSizeEstimatorAnalysis::InlineSizeEstimatorAnalysis( - InlineSizeEstimatorAnalysis &&Other) - : Evaluator(std::move(Other.Evaluator)) {} - -#else -namespace llvm { -class TFModelEvaluator {}; -} // namespace llvm -InlineSizeEstimatorAnalysis::InlineSizeEstimatorAnalysis() = default; -InlineSizeEstimatorAnalysis ::InlineSizeEstimatorAnalysis( - InlineSizeEstimatorAnalysis &&) {} -InlineSizeEstimatorAnalysis::~InlineSizeEstimatorAnalysis() = default; -InlineSizeEstimatorAnalysis::Result -InlineSizeEstimatorAnalysis::run(const Function &F, - FunctionAnalysisManager &FAM) { - return std::nullopt; -} -bool InlineSizeEstimatorAnalysis::isEvaluatorRequested() { return false; } -#endif - -PreservedAnalyses -InlineSizeEstimatorAnalysisPrinterPass::run(Function &F, - FunctionAnalysisManager &AM) { - OS << "[InlineSizeEstimatorAnalysis] size estimate for " << F.getName() - << ": " << AM.getResult<InlineSizeEstimatorAnalysis>(F) << "\n"; - return PreservedAnalyses::all(); -} diff --git a/llvm/lib/Analysis/TargetLibraryInfo.cpp b/llvm/lib/Analysis/TargetLibraryInfo.cpp index 74f3a7d..f97abc9 100644 --- a/llvm/lib/Analysis/TargetLibraryInfo.cpp +++ b/llvm/lib/Analysis/TargetLibraryInfo.cpp @@ -15,33 +15,11 @@ #include "llvm/ADT/SmallString.h" #include "llvm/IR/Constants.h" #include "llvm/IR/Module.h" +#include "llvm/IR/SystemLibraries.h" #include "llvm/InitializePasses.h" -#include "llvm/Support/CommandLine.h" #include "llvm/TargetParser/Triple.h" using namespace llvm; -static cl::opt<TargetLibraryInfoImpl::VectorLibrary> ClVectorLibrary( - "vector-library", cl::Hidden, cl::desc("Vector functions library"), - cl::init(TargetLibraryInfoImpl::NoLibrary), - cl::values(clEnumValN(TargetLibraryInfoImpl::NoLibrary, "none", - "No vector functions library"), - clEnumValN(TargetLibraryInfoImpl::Accelerate, "Accelerate", - "Accelerate framework"), - clEnumValN(TargetLibraryInfoImpl::DarwinLibSystemM, - "Darwin_libsystem_m", "Darwin libsystem_m"), - clEnumValN(TargetLibraryInfoImpl::LIBMVEC, "LIBMVEC", - "GLIBC Vector Math library"), - clEnumValN(TargetLibraryInfoImpl::MASSV, "MASSV", - "IBM MASS vector library"), - clEnumValN(TargetLibraryInfoImpl::SVML, "SVML", - "Intel SVML library"), - clEnumValN(TargetLibraryInfoImpl::SLEEFGNUABI, "sleefgnuabi", - "SIMD Library for Evaluating Elementary Functions"), - clEnumValN(TargetLibraryInfoImpl::ArmPL, "ArmPL", - "Arm Performance Libraries"), - clEnumValN(TargetLibraryInfoImpl::AMDLIBM, "AMDLIBM", - "AMD vector math library"))); - StringLiteral const TargetLibraryInfoImpl::StandardNames[LibFunc::NumLibFuncs] = { #define TLI_DEFINE_STRING @@ -1392,15 +1370,15 @@ const VecDesc VecFuncs_AMDLIBM[] = { void TargetLibraryInfoImpl::addVectorizableFunctionsFromVecLib( enum VectorLibrary VecLib, const llvm::Triple &TargetTriple) { switch (VecLib) { - case Accelerate: { + case VectorLibrary::Accelerate: { addVectorizableFunctions(VecFuncs_Accelerate); break; } - case DarwinLibSystemM: { + case VectorLibrary::DarwinLibSystemM: { addVectorizableFunctions(VecFuncs_DarwinLibSystemM); break; } - case LIBMVEC: { + case VectorLibrary::LIBMVEC: { switch (TargetTriple.getArch()) { default: break; @@ -1415,15 +1393,15 @@ void TargetLibraryInfoImpl::addVectorizableFunctionsFromVecLib( } break; } - case MASSV: { + case VectorLibrary::MASSV: { addVectorizableFunctions(VecFuncs_MASSV); break; } - case SVML: { + case VectorLibrary::SVML: { addVectorizableFunctions(VecFuncs_SVML); break; } - case SLEEFGNUABI: { + case VectorLibrary::SLEEFGNUABI: { switch (TargetTriple.getArch()) { default: break; @@ -1439,7 +1417,7 @@ void TargetLibraryInfoImpl::addVectorizableFunctionsFromVecLib( } break; } - case ArmPL: { + case VectorLibrary::ArmPL: { switch (TargetTriple.getArch()) { default: break; @@ -1450,11 +1428,11 @@ void TargetLibraryInfoImpl::addVectorizableFunctionsFromVecLib( } break; } - case AMDLIBM: { + case VectorLibrary::AMDLIBM: { addVectorizableFunctions(VecFuncs_AMDLIBM); break; } - case NoLibrary: + case VectorLibrary::NoLibrary: break; } } diff --git a/llvm/lib/CodeGen/AggressiveAntiDepBreaker.cpp b/llvm/lib/CodeGen/AggressiveAntiDepBreaker.cpp index 6567bd4..46b5bb7 100644 --- a/llvm/lib/CodeGen/AggressiveAntiDepBreaker.cpp +++ b/llvm/lib/CodeGen/AggressiveAntiDepBreaker.cpp @@ -395,7 +395,7 @@ void AggressiveAntiDepBreaker::PrescanInstruction( // Note register reference... const TargetRegisterClass *RC = nullptr; if (i < MI.getDesc().getNumOperands()) - RC = TII->getRegClass(MI.getDesc(), i, TRI); + RC = TII->getRegClass(MI.getDesc(), i); AggressiveAntiDepState::RegisterReference RR = { &MO, RC }; RegRefs.emplace(Reg.asMCReg(), RR); } @@ -479,7 +479,7 @@ void AggressiveAntiDepBreaker::ScanInstruction(MachineInstr &MI, // Note register reference... const TargetRegisterClass *RC = nullptr; if (i < MI.getDesc().getNumOperands()) - RC = TII->getRegClass(MI.getDesc(), i, TRI); + RC = TII->getRegClass(MI.getDesc(), i); AggressiveAntiDepState::RegisterReference RR = { &MO, RC }; RegRefs.emplace(Reg.asMCReg(), RR); } diff --git a/llvm/lib/CodeGen/BasicBlockSections.cpp b/llvm/lib/CodeGen/BasicBlockSections.cpp index e317e1c..52e2909 100644 --- a/llvm/lib/CodeGen/BasicBlockSections.cpp +++ b/llvm/lib/CodeGen/BasicBlockSections.cpp @@ -183,8 +183,7 @@ updateBranches(MachineFunction &MF, // clusters are ordered in increasing order of their IDs, with the "Exception" // and "Cold" succeeding all other clusters. // FuncClusterInfo represents the cluster information for basic blocks. It -// maps from BBID of basic blocks to their cluster information. If this is -// empty, it means unique sections for all basic blocks in the function. +// maps from BBID of basic blocks to their cluster information. static void assignSections(MachineFunction &MF, const DenseMap<UniqueBBID, BBClusterInfo> &FuncClusterInfo) { @@ -197,10 +196,8 @@ assignSections(MachineFunction &MF, for (auto &MBB : MF) { // With the 'all' option, every basic block is placed in a unique section. // With the 'list' option, every basic block is placed in a section - // associated with its cluster, unless we want individual unique sections - // for every basic block in this function (if FuncClusterInfo is empty). - if (MF.getTarget().getBBSectionsType() == llvm::BasicBlockSection::All || - FuncClusterInfo.empty()) { + // associated with its cluster. + if (MF.getTarget().getBBSectionsType() == llvm::BasicBlockSection::All) { // If unique sections are desired for all basic blocks of the function, we // set every basic block's section ID equal to its original position in // the layout (which is equal to its number). This ensures that basic @@ -308,22 +305,22 @@ bool BasicBlockSections::handleBBSections(MachineFunction &MF) { if (BBSectionsType == BasicBlockSection::List && hasInstrProfHashMismatch(MF)) return false; - // Renumber blocks before sorting them. This is useful for accessing the - // original layout positions and finding the original fallthroughs. - MF.RenumberBlocks(); DenseMap<UniqueBBID, BBClusterInfo> FuncClusterInfo; if (BBSectionsType == BasicBlockSection::List) { - auto [HasProfile, ClusterInfo] = - getAnalysis<BasicBlockSectionsProfileReaderWrapperPass>() - .getClusterInfoForFunction(MF.getName()); - if (!HasProfile) + auto ClusterInfo = getAnalysis<BasicBlockSectionsProfileReaderWrapperPass>() + .getClusterInfoForFunction(MF.getName()); + if (ClusterInfo.empty()) return false; for (auto &BBClusterInfo : ClusterInfo) { FuncClusterInfo.try_emplace(BBClusterInfo.BBID, BBClusterInfo); } } + // Renumber blocks before sorting them. This is useful for accessing the + // original layout positions and finding the original fallthroughs. + MF.RenumberBlocks(); + MF.setBBSectionsType(BBSectionsType); assignSections(MF, FuncClusterInfo); diff --git a/llvm/lib/CodeGen/BasicBlockSectionsProfileReader.cpp b/llvm/lib/CodeGen/BasicBlockSectionsProfileReader.cpp index 485b44ae..c234c0f 100644 --- a/llvm/lib/CodeGen/BasicBlockSectionsProfileReader.cpp +++ b/llvm/lib/CodeGen/BasicBlockSectionsProfileReader.cpp @@ -58,22 +58,24 @@ BasicBlockSectionsProfileReader::parseUniqueBBID(StringRef S) const { } bool BasicBlockSectionsProfileReader::isFunctionHot(StringRef FuncName) const { - return getClusterInfoForFunction(FuncName).first; + return !getClusterInfoForFunction(FuncName).empty(); } -std::pair<bool, SmallVector<BBClusterInfo>> +SmallVector<BBClusterInfo> BasicBlockSectionsProfileReader::getClusterInfoForFunction( StringRef FuncName) const { auto R = ProgramPathAndClusterInfo.find(getAliasName(FuncName)); - return R != ProgramPathAndClusterInfo.end() - ? std::pair(true, R->second.ClusterInfo) - : std::pair(false, SmallVector<BBClusterInfo>()); + return R != ProgramPathAndClusterInfo.end() ? R->second.ClusterInfo + : SmallVector<BBClusterInfo>(); } SmallVector<SmallVector<unsigned>> BasicBlockSectionsProfileReader::getClonePathsForFunction( StringRef FuncName) const { - return ProgramPathAndClusterInfo.lookup(getAliasName(FuncName)).ClonePaths; + auto R = ProgramPathAndClusterInfo.find(getAliasName(FuncName)); + return R != ProgramPathAndClusterInfo.end() + ? R->second.ClonePaths + : SmallVector<SmallVector<unsigned>>(); } uint64_t BasicBlockSectionsProfileReader::getEdgeCount( @@ -494,7 +496,7 @@ bool BasicBlockSectionsProfileReaderWrapperPass::isFunctionHot( return BBSPR.isFunctionHot(FuncName); } -std::pair<bool, SmallVector<BBClusterInfo>> +SmallVector<BBClusterInfo> BasicBlockSectionsProfileReaderWrapperPass::getClusterInfoForFunction( StringRef FuncName) const { return BBSPR.getClusterInfoForFunction(FuncName); diff --git a/llvm/lib/CodeGen/BreakFalseDeps.cpp b/llvm/lib/CodeGen/BreakFalseDeps.cpp index 1846880..fead3ee 100644 --- a/llvm/lib/CodeGen/BreakFalseDeps.cpp +++ b/llvm/lib/CodeGen/BreakFalseDeps.cpp @@ -133,7 +133,7 @@ bool BreakFalseDeps::pickBestRegisterForUndef(MachineInstr *MI, unsigned OpIdx, } // Get the undef operand's register class - const TargetRegisterClass *OpRC = TII->getRegClass(MI->getDesc(), OpIdx, TRI); + const TargetRegisterClass *OpRC = TII->getRegClass(MI->getDesc(), OpIdx); assert(OpRC && "Not a valid register class"); // If the instruction has a true dependency, we can hide the false depdency diff --git a/llvm/lib/CodeGen/CriticalAntiDepBreaker.cpp b/llvm/lib/CodeGen/CriticalAntiDepBreaker.cpp index 86377cf..3259a3e 100644 --- a/llvm/lib/CodeGen/CriticalAntiDepBreaker.cpp +++ b/llvm/lib/CodeGen/CriticalAntiDepBreaker.cpp @@ -187,7 +187,7 @@ void CriticalAntiDepBreaker::PrescanInstruction(MachineInstr &MI) { const TargetRegisterClass *NewRC = nullptr; if (i < MI.getDesc().getNumOperands()) - NewRC = TII->getRegClass(MI.getDesc(), i, TRI); + NewRC = TII->getRegClass(MI.getDesc(), i); // For now, only allow the register to be changed if its register // class is consistent across all uses. @@ -316,7 +316,7 @@ void CriticalAntiDepBreaker::ScanInstruction(MachineInstr &MI, unsigned Count) { const TargetRegisterClass *NewRC = nullptr; if (i < MI.getDesc().getNumOperands()) - NewRC = TII->getRegClass(MI.getDesc(), i, TRI); + NewRC = TII->getRegClass(MI.getDesc(), i); // For now, only allow the register to be changed if its register // class is consistent across all uses. diff --git a/llvm/lib/CodeGen/FixupStatepointCallerSaved.cpp b/llvm/lib/CodeGen/FixupStatepointCallerSaved.cpp index 8b74dce..c23cac7 100644 --- a/llvm/lib/CodeGen/FixupStatepointCallerSaved.cpp +++ b/llvm/lib/CodeGen/FixupStatepointCallerSaved.cpp @@ -420,7 +420,7 @@ public: LLVM_DEBUG(dbgs() << "Insert spill before " << *InsertBefore); TII.storeRegToStackSlot(*MI.getParent(), InsertBefore, Reg, IsKill, FI, - RC, &TRI, Register()); + RC, Register()); } } @@ -429,7 +429,7 @@ public: const TargetRegisterClass *RC = TRI.getMinimalPhysRegClass(Reg); int FI = RegToSlotIdx[Reg]; if (It != MBB->end()) { - TII.loadRegFromStackSlot(*MBB, It, Reg, FI, RC, &TRI, Register()); + TII.loadRegFromStackSlot(*MBB, It, Reg, FI, RC, Register()); return; } @@ -437,7 +437,7 @@ public: // and then swap them. assert(!MBB->empty() && "Empty block"); --It; - TII.loadRegFromStackSlot(*MBB, It, Reg, FI, RC, &TRI, Register()); + TII.loadRegFromStackSlot(*MBB, It, Reg, FI, RC, Register()); MachineInstr *Reload = It->getPrevNode(); int Dummy = 0; (void)Dummy; diff --git a/llvm/lib/CodeGen/GlobalISel/Utils.cpp b/llvm/lib/CodeGen/GlobalISel/Utils.cpp index 5fab6ec..e8954a3 100644 --- a/llvm/lib/CodeGen/GlobalISel/Utils.cpp +++ b/llvm/lib/CodeGen/GlobalISel/Utils.cpp @@ -114,7 +114,7 @@ Register llvm::constrainOperandRegClass( // Assume physical registers are properly constrained. assert(Reg.isVirtual() && "PhysReg not implemented"); - const TargetRegisterClass *OpRC = TII.getRegClass(II, OpIdx, &TRI); + const TargetRegisterClass *OpRC = TII.getRegClass(II, OpIdx); // Some of the target independent instructions, like COPY, may not impose any // register class constraints on some of their operands: If it's a use, we can // skip constraining as the instruction defining the register would constrain diff --git a/llvm/lib/CodeGen/InitUndef.cpp b/llvm/lib/CodeGen/InitUndef.cpp index e07e598..12b36f5 100644 --- a/llvm/lib/CodeGen/InitUndef.cpp +++ b/llvm/lib/CodeGen/InitUndef.cpp @@ -232,7 +232,7 @@ bool InitUndef::processBasicBlock(MachineFunction &MF, MachineBasicBlock &MBB, MachineOperand &UseMO = MI.getOperand(UseOpIdx); if (UseMO.getReg() == MCRegister::NoRegister) { const TargetRegisterClass *RC = - TII->getRegClass(MI.getDesc(), UseOpIdx, TRI); + TII->getRegClass(MI.getDesc(), UseOpIdx); Register NewDest = MRI->createVirtualRegister(RC); // We don't have a way to update dead lanes, so keep track of the // new register so that we avoid querying it later. diff --git a/llvm/lib/CodeGen/InlineSpiller.cpp b/llvm/lib/CodeGen/InlineSpiller.cpp index c3e0964..6837030 100644 --- a/llvm/lib/CodeGen/InlineSpiller.cpp +++ b/llvm/lib/CodeGen/InlineSpiller.cpp @@ -473,7 +473,7 @@ bool InlineSpiller::hoistSpillInsideBB(LiveInterval &SpillLI, MachineInstrSpan MIS(MII, MBB); // Insert spill without kill flag immediately after def. TII.storeRegToStackSlot(*MBB, MII, SrcReg, false, StackSlot, - MRI.getRegClass(SrcReg), &TRI, Register()); + MRI.getRegClass(SrcReg), Register()); LIS.InsertMachineInstrRangeInMaps(MIS.begin(), MII); for (const MachineInstr &MI : make_range(MIS.begin(), MII)) getVDefInterval(MI, LIS); @@ -1119,7 +1119,7 @@ void InlineSpiller::insertReload(Register NewVReg, MachineInstrSpan MIS(MI, &MBB); TII.loadRegFromStackSlot(MBB, MI, NewVReg, StackSlot, - MRI.getRegClass(NewVReg), &TRI, Register()); + MRI.getRegClass(NewVReg), Register()); LIS.InsertMachineInstrRangeInMaps(MIS.begin(), MI); @@ -1155,7 +1155,7 @@ void InlineSpiller::insertSpill(Register NewVReg, bool isKill, if (IsRealSpill) TII.storeRegToStackSlot(MBB, SpillBefore, NewVReg, isKill, StackSlot, - MRI.getRegClass(NewVReg), &TRI, Register()); + MRI.getRegClass(NewVReg), Register()); else // Don't spill undef value. // Anything works for undef, in particular keeping the memory @@ -1729,7 +1729,7 @@ void HoistSpillHelper::hoistAllSpills() { MachineBasicBlock::iterator MII = IPA.getLastInsertPointIter(OrigLI, *BB); MachineInstrSpan MIS(MII, BB); TII.storeRegToStackSlot(*BB, MII, LiveReg, false, Slot, - MRI.getRegClass(LiveReg), &TRI, Register()); + MRI.getRegClass(LiveReg), Register()); LIS.InsertMachineInstrRangeInMaps(MIS.begin(), MII); for (const MachineInstr &MI : make_range(MIS.begin(), MII)) getVDefInterval(MI, LIS); diff --git a/llvm/lib/CodeGen/LiveRangeEdit.cpp b/llvm/lib/CodeGen/LiveRangeEdit.cpp index 5b0365d..6fe1170 100644 --- a/llvm/lib/CodeGen/LiveRangeEdit.cpp +++ b/llvm/lib/CodeGen/LiveRangeEdit.cpp @@ -88,7 +88,7 @@ SlotIndex LiveRangeEdit::rematerializeAt(MachineBasicBlock &MBB, bool Late, unsigned SubIdx, MachineInstr *ReplaceIndexMI) { assert(RM.OrigMI && "Invalid remat"); - TII.reMaterialize(MBB, MI, DestReg, SubIdx, *RM.OrigMI, tri); + TII.reMaterialize(MBB, MI, DestReg, SubIdx, *RM.OrigMI); // DestReg of the cloned instruction cannot be Dead. Set isDead of DestReg // to false anyway in case the isDead flag of RM.OrigMI's dest register // is true. diff --git a/llvm/lib/CodeGen/MLRegAllocEvictAdvisor.cpp b/llvm/lib/CodeGen/MLRegAllocEvictAdvisor.cpp index a72c2c4..32b6c46 100644 --- a/llvm/lib/CodeGen/MLRegAllocEvictAdvisor.cpp +++ b/llvm/lib/CodeGen/MLRegAllocEvictAdvisor.cpp @@ -83,13 +83,6 @@ static cl::opt<std::string> ModelUnderTraining( "regalloc-model", cl::Hidden, cl::desc("The model being trained for register allocation eviction")); -static cl::opt<bool> EnableDevelopmentFeatures( - "regalloc-enable-development-features", cl::Hidden, - cl::desc("Whether or not to enable features under development for the ML " - "regalloc advisor")); - -#else -static const bool EnableDevelopmentFeatures = false; #endif // #ifdef LLVM_HAVE_TFLITE /// The score injection pass. @@ -212,23 +205,6 @@ static const std::vector<int64_t> PerLiveRangeShape{1, NumberOfInterferences}; "lowest stage of an interval in this LR") \ M(float, progress, {1}, "ratio of current queue size to initial size") -#ifdef LLVM_HAVE_TFLITE -#define RA_EVICT_FIRST_DEVELOPMENT_FEATURE(M) \ - M(int64_t, instructions, InstructionsShape, \ - "Opcodes of the instructions covered by the eviction problem") - -#define RA_EVICT_REST_DEVELOPMENT_FEATURES(M) \ - M(int64_t, instructions_mapping, InstructionsMappingShape, \ - "A binary matrix mapping LRs to instruction opcodes") \ - M(float, mbb_frequencies, MBBFrequencyShape, \ - "A vector of machine basic block frequencies") \ - M(int64_t, mbb_mapping, InstructionsShape, \ - "A vector of indices mapping instructions to MBBs") -#else -#define RA_EVICT_FIRST_DEVELOPMENT_FEATURE(M) -#define RA_EVICT_REST_DEVELOPMENT_FEATURES(M) -#endif - // The model learns to pick one of the mask == 1 interferences. This is the // name of the output tensor. The contract with the model is that the output // will be guaranteed to be to a mask == 1 position. Using a macro here to @@ -242,12 +218,6 @@ enum FeatureIDs { #define _FEATURE_IDX_SIMPLE(_, name, __, ___) name #define _FEATURE_IDX(A, B, C, D) _FEATURE_IDX_SIMPLE(A, B, C, D), RA_EVICT_FEATURES_LIST(_FEATURE_IDX) FeatureCount, -#ifdef LLVM_HAVE_TFLITE - RA_EVICT_FIRST_DEVELOPMENT_FEATURE(_FEATURE_IDX_SIMPLE) = FeatureCount, -#else - RA_EVICT_FIRST_DEVELOPMENT_FEATURE(_FEATURE_IDX) -#endif // #ifdef LLVM_HAVE_TFLITE - RA_EVICT_REST_DEVELOPMENT_FEATURES(_FEATURE_IDX) FeaturesWithDevelopmentCount #undef _FEATURE_IDX #undef _FEATURE_IDX_SIMPLE }; @@ -268,11 +238,7 @@ void resetInputs(MLModelRunner &Runner) { std::memset(Runner.getTensorUntyped(FeatureIDs::NAME), 0, \ getTotalSize<TYPE>(SHAPE)); RA_EVICT_FEATURES_LIST(_RESET) - if (EnableDevelopmentFeatures) { - RA_EVICT_FIRST_DEVELOPMENT_FEATURE(_RESET) - RA_EVICT_REST_DEVELOPMENT_FEATURES(_RESET) #undef _RESET - } } // Per-live interval components that get aggregated into the feature values @@ -398,13 +364,7 @@ class ReleaseModeEvictionAdvisorProvider final public: ReleaseModeEvictionAdvisorProvider(LLVMContext &Ctx) : RegAllocEvictionAdvisorProvider(AdvisorMode::Release, Ctx) { - if (EnableDevelopmentFeatures) { - InputFeatures = {RA_EVICT_FEATURES_LIST( - _DECL_FEATURES) RA_EVICT_FIRST_DEVELOPMENT_FEATURE(_DECL_FEATURES) - RA_EVICT_REST_DEVELOPMENT_FEATURES(_DECL_FEATURES)}; - } else { - InputFeatures = {RA_EVICT_FEATURES_LIST(_DECL_FEATURES)}; - } + InputFeatures = {RA_EVICT_FEATURES_LIST(_DECL_FEATURES)}; } // support for isa<> and dyn_cast. static bool classof(const RegAllocEvictionAdvisorProvider *R) { @@ -500,25 +460,12 @@ class DevelopmentModeEvictionAdvisorProvider final public: DevelopmentModeEvictionAdvisorProvider(LLVMContext &Ctx) : RegAllocEvictionAdvisorProvider(AdvisorMode::Development, Ctx) { - if (EnableDevelopmentFeatures) { - InputFeatures = {RA_EVICT_FEATURES_LIST( - _DECL_FEATURES) RA_EVICT_FIRST_DEVELOPMENT_FEATURE(_DECL_FEATURES) - RA_EVICT_REST_DEVELOPMENT_FEATURES(_DECL_FEATURES)}; - TrainingInputFeatures = { - RA_EVICT_FEATURES_LIST(_DECL_TRAIN_FEATURES) - RA_EVICT_FIRST_DEVELOPMENT_FEATURE(_DECL_TRAIN_FEATURES) - RA_EVICT_REST_DEVELOPMENT_FEATURES(_DECL_TRAIN_FEATURES) - TensorSpec::createSpec<float>("action_discount", {1}), - TensorSpec::createSpec<int32_t>("action_step_type", {1}), - TensorSpec::createSpec<float>("action_reward", {1})}; - } else { - InputFeatures = {RA_EVICT_FEATURES_LIST(_DECL_FEATURES)}; - TrainingInputFeatures = { - RA_EVICT_FEATURES_LIST(_DECL_TRAIN_FEATURES) - TensorSpec::createSpec<float>("action_discount", {1}), - TensorSpec::createSpec<int32_t>("action_step_type", {1}), - TensorSpec::createSpec<float>("action_reward", {1})}; - } + InputFeatures = {RA_EVICT_FEATURES_LIST(_DECL_FEATURES)}; + TrainingInputFeatures = { + RA_EVICT_FEATURES_LIST(_DECL_TRAIN_FEATURES) + TensorSpec::createSpec<float>("action_discount", {1}), + TensorSpec::createSpec<int32_t>("action_step_type", {1}), + TensorSpec::createSpec<float>("action_reward", {1})}; if (ModelUnderTraining.empty() && TrainingLog.empty()) { Ctx.emitError("Regalloc development mode should be requested with at " "least logging enabled and/or a training model"); @@ -814,34 +761,6 @@ MCRegister MLEvictAdvisor::tryFindEvictionCandidate( /*NumUrgent*/ 0.0, LRPosInfo); assert(InitialQSize > 0.0 && "We couldn't have gotten here if we had " "nothing to allocate initially."); -#ifdef LLVM_HAVE_TFLITE - if (EnableDevelopmentFeatures) { - extractInstructionFeatures( - LRPosInfo, Runner, - [this](SlotIndex InputIndex) -> int { - auto *CurrentMachineInstruction = - LIS->getInstructionFromIndex(InputIndex); - if (!CurrentMachineInstruction) { - return -1; - } - return CurrentMachineInstruction->getOpcode(); - }, - [this](SlotIndex InputIndex) -> float { - auto *CurrentMachineInstruction = - LIS->getInstructionFromIndex(InputIndex); - return MBFI.getBlockFreqRelativeToEntryBlock( - CurrentMachineInstruction->getParent()); - }, - [this](SlotIndex InputIndex) -> MachineBasicBlock * { - auto *CurrentMachineInstruction = - LIS->getInstructionFromIndex(InputIndex); - return CurrentMachineInstruction->getParent(); - }, - FeatureIDs::instructions, FeatureIDs::instructions_mapping, - FeatureIDs::mbb_frequencies, FeatureIDs::mbb_mapping, - LIS->getSlotIndexes()->getLastIndex()); - } -#endif // #ifdef LLVM_HAVE_TFLITE // Normalize the features. for (auto &V : Largest) V = V ? V : 1.0; @@ -987,13 +906,6 @@ void MLEvictAdvisor::extractFeatures( HintWeights += LIFC.HintWeights; NumRematerializable += LIFC.IsRemat; - - if (EnableDevelopmentFeatures) { - for (auto CurrentSegment : LI) { - LRPosInfo.push_back( - LRStartEndInfo{CurrentSegment.start, CurrentSegment.end, Pos}); - } - } } size_t Size = 0; if (!Intervals.empty()) { @@ -1209,9 +1121,7 @@ int64_t DevelopmentModeEvictAdvisor::tryFindEvictionCandidatePosition( Log->startObservation(); size_t CurrentFeature = 0; - size_t FeatureCount = EnableDevelopmentFeatures - ? FeatureIDs::FeaturesWithDevelopmentCount - : FeatureIDs::FeatureCount; + size_t FeatureCount = FeatureIDs::FeatureCount; for (; CurrentFeature < FeatureCount; ++CurrentFeature) { Log->logTensorValue(CurrentFeature, reinterpret_cast<const char *>( diff --git a/llvm/lib/CodeGen/MachineInstr.cpp b/llvm/lib/CodeGen/MachineInstr.cpp index 37e5c51..eb46124 100644 --- a/llvm/lib/CodeGen/MachineInstr.cpp +++ b/llvm/lib/CodeGen/MachineInstr.cpp @@ -978,7 +978,7 @@ MachineInstr::getRegClassConstraint(unsigned OpIdx, assert(getMF() && "Can't have an MF reference here!"); // Most opcodes have fixed constraints in their MCInstrDesc. if (!isInlineAsm()) - return TII->getRegClass(getDesc(), OpIdx, TRI); + return TII->getRegClass(getDesc(), OpIdx); if (!getOperand(OpIdx).isReg()) return nullptr; diff --git a/llvm/lib/CodeGen/MachineLICM.cpp b/llvm/lib/CodeGen/MachineLICM.cpp index 729e73c..c169467 100644 --- a/llvm/lib/CodeGen/MachineLICM.cpp +++ b/llvm/lib/CodeGen/MachineLICM.cpp @@ -1399,7 +1399,7 @@ MachineInstr *MachineLICMImpl::ExtractHoistableLoad(MachineInstr *MI, if (NewOpc == 0) return nullptr; const MCInstrDesc &MID = TII->get(NewOpc); MachineFunction &MF = *MI->getMF(); - const TargetRegisterClass *RC = TII->getRegClass(MID, LoadRegIndex, TRI); + const TargetRegisterClass *RC = TII->getRegClass(MID, LoadRegIndex); // Ok, we're unfolding. Create a temporary register and do the unfold. Register Reg = MRI->createVirtualRegister(RC); diff --git a/llvm/lib/CodeGen/MachineSink.cpp b/llvm/lib/CodeGen/MachineSink.cpp index 94ed82e..0ceeda4 100644 --- a/llvm/lib/CodeGen/MachineSink.cpp +++ b/llvm/lib/CodeGen/MachineSink.cpp @@ -569,7 +569,7 @@ bool MachineSinking::PerformSinkAndFold(MachineInstr &MI, // Sink a copy of the instruction, replacing a COPY instruction. MachineBasicBlock::iterator InsertPt = SinkDst->getIterator(); Register DstReg = SinkDst->getOperand(0).getReg(); - TII->reMaterialize(*SinkDst->getParent(), InsertPt, DstReg, 0, MI, *TRI); + TII->reMaterialize(*SinkDst->getParent(), InsertPt, DstReg, 0, MI); New = &*std::prev(InsertPt); if (!New->getDebugLoc()) New->setDebugLoc(SinkDst->getDebugLoc()); diff --git a/llvm/lib/CodeGen/MachineVerifier.cpp b/llvm/lib/CodeGen/MachineVerifier.cpp index fdf1048..013f529 100644 --- a/llvm/lib/CodeGen/MachineVerifier.cpp +++ b/llvm/lib/CodeGen/MachineVerifier.cpp @@ -2657,8 +2657,7 @@ MachineVerifier::visitMachineOperand(const MachineOperand *MO, unsigned MONum) { return; } if (MONum < MCID.getNumOperands()) { - if (const TargetRegisterClass *DRC = - TII->getRegClass(MCID, MONum, TRI)) { + if (const TargetRegisterClass *DRC = TII->getRegClass(MCID, MONum)) { if (!DRC->contains(Reg)) { report("Illegal physical register for instruction", MO, MONum); OS << printReg(Reg, TRI) << " is not a " @@ -2742,12 +2741,11 @@ MachineVerifier::visitMachineOperand(const MachineOperand *MO, unsigned MONum) { // has register class constraint, the virtual register must // comply to it. if (!isPreISelGenericOpcode(MCID.getOpcode()) && - MONum < MCID.getNumOperands() && - TII->getRegClass(MCID, MONum, TRI)) { + MONum < MCID.getNumOperands() && TII->getRegClass(MCID, MONum)) { report("Virtual register does not match instruction constraint", MO, MONum); OS << "Expect register class " - << TRI->getRegClassName(TII->getRegClass(MCID, MONum, TRI)) + << TRI->getRegClassName(TII->getRegClass(MCID, MONum)) << " but got nothing\n"; return; } @@ -2773,8 +2771,7 @@ MachineVerifier::visitMachineOperand(const MachineOperand *MO, unsigned MONum) { } } if (MONum < MCID.getNumOperands()) { - if (const TargetRegisterClass *DRC = - TII->getRegClass(MCID, MONum, TRI)) { + if (const TargetRegisterClass *DRC = TII->getRegClass(MCID, MONum)) { if (SubIdx) { const TargetRegisterClass *SuperRC = TRI->getLargestLegalSuperClass(RC, *MF); diff --git a/llvm/lib/CodeGen/RegAllocFast.cpp b/llvm/lib/CodeGen/RegAllocFast.cpp index ec6ffd4..9097728 100644 --- a/llvm/lib/CodeGen/RegAllocFast.cpp +++ b/llvm/lib/CodeGen/RegAllocFast.cpp @@ -594,8 +594,7 @@ void RegAllocFastImpl::spill(MachineBasicBlock::iterator Before, LLVM_DEBUG(dbgs() << " to stack slot #" << FI << '\n'); const TargetRegisterClass &RC = *MRI->getRegClass(VirtReg); - TII->storeRegToStackSlot(*MBB, Before, AssignedReg, Kill, FI, &RC, TRI, - VirtReg); + TII->storeRegToStackSlot(*MBB, Before, AssignedReg, Kill, FI, &RC, VirtReg); ++NumStores; MachineBasicBlock::iterator FirstTerm = MBB->getFirstTerminator(); @@ -652,7 +651,7 @@ void RegAllocFastImpl::reload(MachineBasicBlock::iterator Before, << printReg(PhysReg, TRI) << '\n'); int FI = getStackSpaceFor(VirtReg); const TargetRegisterClass &RC = *MRI->getRegClass(VirtReg); - TII->loadRegFromStackSlot(*MBB, Before, PhysReg, FI, &RC, TRI, VirtReg); + TII->loadRegFromStackSlot(*MBB, Before, PhysReg, FI, &RC, VirtReg); ++NumLoads; } @@ -1123,7 +1122,7 @@ bool RegAllocFastImpl::defineVirtReg(MachineInstr &MI, unsigned OpNum, if (MO.isMBB()) { MachineBasicBlock *Succ = MO.getMBB(); TII->storeRegToStackSlot(*Succ, Succ->begin(), PhysReg, Kill, FI, - &RC, TRI, VirtReg); + &RC, VirtReg); ++NumStores; Succ->addLiveIn(PhysReg); } diff --git a/llvm/lib/CodeGen/RegisterCoalescer.cpp b/llvm/lib/CodeGen/RegisterCoalescer.cpp index 99f7693..005e44f 100644 --- a/llvm/lib/CodeGen/RegisterCoalescer.cpp +++ b/llvm/lib/CodeGen/RegisterCoalescer.cpp @@ -79,9 +79,9 @@ static cl::opt<bool> EnableJoining("join-liveintervals", cl::desc("Coalesce copies (default=true)"), cl::init(true), cl::Hidden); -static cl::opt<bool> UseTerminalRule("terminal-rule", - cl::desc("Apply the terminal rule"), - cl::init(false), cl::Hidden); +static cl::opt<cl::boolOrDefault> + EnableTerminalRule("terminal-rule", cl::desc("Apply the terminal rule"), + cl::init(cl::BOU_UNSET), cl::Hidden); /// Temporary flag to test critical edge unsplitting. static cl::opt<bool> EnableJoinSplits( @@ -134,6 +134,7 @@ class RegisterCoalescer : private LiveRangeEdit::Delegate { SlotIndexes *SI = nullptr; const MachineLoopInfo *Loops = nullptr; RegisterClassInfo RegClassInfo; + bool UseTerminalRule = false; /// Position and VReg of a PHI instruction during coalescing. struct PHIValPos { @@ -1373,7 +1374,7 @@ bool RegisterCoalescer::reMaterializeDef(const CoalescerPair &CP, } const unsigned DefSubIdx = DefMI->getOperand(0).getSubReg(); - const TargetRegisterClass *DefRC = TII->getRegClass(MCID, 0, TRI); + const TargetRegisterClass *DefRC = TII->getRegClass(MCID, 0); if (!DefMI->isImplicitDef()) { if (DstReg.isPhysical()) { Register NewDstReg = DstReg; @@ -4320,6 +4321,11 @@ bool RegisterCoalescer::run(MachineFunction &fn) { else JoinGlobalCopies = (EnableGlobalCopies == cl::BOU_TRUE); + if (EnableTerminalRule == cl::BOU_UNSET) + UseTerminalRule = STI.enableTerminalRule(); + else + UseTerminalRule = EnableTerminalRule == cl::BOU_TRUE; + // If there are PHIs tracked by debug-info, they will need updating during // coalescing. Build an index of those PHIs to ease updating. SlotIndexes *Slots = LIS->getSlotIndexes(); diff --git a/llvm/lib/CodeGen/RegisterScavenging.cpp b/llvm/lib/CodeGen/RegisterScavenging.cpp index 7e26c2e..d886167 100644 --- a/llvm/lib/CodeGen/RegisterScavenging.cpp +++ b/llvm/lib/CodeGen/RegisterScavenging.cpp @@ -276,14 +276,14 @@ RegScavenger::spill(Register Reg, const TargetRegisterClass &RC, int SPAdj, ": Cannot scavenge register without an emergency " "spill slot!"); } - TII->storeRegToStackSlot(*MBB, Before, Reg, true, FI, &RC, TRI, Register()); + TII->storeRegToStackSlot(*MBB, Before, Reg, true, FI, &RC, Register()); MachineBasicBlock::iterator II = std::prev(Before); unsigned FIOperandNum = getFrameIndexOperandNum(*II); TRI->eliminateFrameIndex(II, SPAdj, FIOperandNum, this); // Restore the scavenged register before its use (or first terminator). - TII->loadRegFromStackSlot(*MBB, UseMI, Reg, FI, &RC, TRI, Register()); + TII->loadRegFromStackSlot(*MBB, UseMI, Reg, FI, &RC, Register()); II = std::prev(UseMI); FIOperandNum = getFrameIndexOperandNum(*II); diff --git a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp index f144f17..df353c4 100644 --- a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp @@ -10988,6 +10988,22 @@ SDValue DAGCombiner::visitSRA(SDNode *N) { } } + // fold (sra (xor (sra x, c1), -1), c2) -> (xor (sra x, c3), -1) + // This allows merging two arithmetic shifts even when there's a NOT in + // between. + SDValue X; + APInt C1; + if (N1C && sd_match(N0, m_OneUse(m_Not( + m_OneUse(m_Sra(m_Value(X), m_ConstInt(C1))))))) { + APInt C2 = N1C->getAPIntValue(); + zeroExtendToMatch(C1, C2, 1 /* Overflow Bit */); + APInt Sum = C1 + C2; + unsigned ShiftSum = Sum.getLimitedValue(OpSizeInBits - 1); + SDValue NewShift = DAG.getNode( + ISD::SRA, DL, VT, X, DAG.getShiftAmountConstant(ShiftSum, VT, DL)); + return DAG.getNOT(DL, NewShift, VT); + } + // fold (sra (shl X, m), (sub result_size, n)) // -> (sign_extend (trunc (shl X, (sub (sub result_size, n), m)))) for // result_size - n != m. @@ -18863,6 +18879,26 @@ SDValue DAGCombiner::visitFCOPYSIGN(SDNode *N) { if (SimplifyDemandedBits(SDValue(N, 0))) return SDValue(N, 0); + if (VT != N1.getValueType()) + return SDValue(); + + // If this is equivalent to a disjoint or, replace it with one. This can + // happen if the sign operand is a sign mask (i.e., x << sign_bit_position). + if (DAG.SignBitIsZeroFP(N0) && + DAG.computeKnownBits(N1).Zero.isMaxSignedValue()) { + // TODO: Just directly match the shift pattern. computeKnownBits is heavy + // for a such a narrowly targeted case. + EVT IntVT = VT.changeTypeToInteger(); + // TODO: It appears to be profitable in some situations to unconditionally + // emit a fabs(n0) to perform this combine. + SDValue CastSrc0 = DAG.getNode(ISD::BITCAST, DL, IntVT, N0); + SDValue CastSrc1 = DAG.getNode(ISD::BITCAST, DL, IntVT, N1); + + SDValue SignOr = DAG.getNode(ISD::OR, DL, IntVT, CastSrc0, CastSrc1, + SDNodeFlags::Disjoint); + return DAG.getNode(ISD::BITCAST, DL, VT, SignOr); + } + return SDValue(); } diff --git a/llvm/lib/CodeGen/SelectionDAG/FastISel.cpp b/llvm/lib/CodeGen/SelectionDAG/FastISel.cpp index 507b2d6..5c84059 100644 --- a/llvm/lib/CodeGen/SelectionDAG/FastISel.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/FastISel.cpp @@ -1965,7 +1965,7 @@ Register FastISel::createResultReg(const TargetRegisterClass *RC) { Register FastISel::constrainOperandRegClass(const MCInstrDesc &II, Register Op, unsigned OpNum) { if (Op.isVirtual()) { - const TargetRegisterClass *RegClass = TII.getRegClass(II, OpNum, &TRI); + const TargetRegisterClass *RegClass = TII.getRegClass(II, OpNum); if (!MRI.constrainRegClass(Op, RegClass)) { // If it's not legal to COPY between the register classes, something // has gone very wrong before we got here. diff --git a/llvm/lib/CodeGen/SelectionDAG/InstrEmitter.cpp b/llvm/lib/CodeGen/SelectionDAG/InstrEmitter.cpp index d84c3fb..72d0c44 100644 --- a/llvm/lib/CodeGen/SelectionDAG/InstrEmitter.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/InstrEmitter.cpp @@ -125,7 +125,7 @@ void InstrEmitter::EmitCopyFromReg(SDValue Op, bool IsClone, Register SrcReg, const TargetRegisterClass *RC = nullptr; if (i + II.getNumDefs() < II.getNumOperands()) { RC = TRI->getAllocatableClass( - TII->getRegClass(II, i + II.getNumDefs(), TRI)); + TII->getRegClass(II, i + II.getNumDefs())); } if (!UseRC) UseRC = RC; @@ -197,7 +197,7 @@ void InstrEmitter::CreateVirtualRegisters(SDNode *Node, // register instead of creating a new vreg. Register VRBase; const TargetRegisterClass *RC = - TRI->getAllocatableClass(TII->getRegClass(II, i, TRI)); + TRI->getAllocatableClass(TII->getRegClass(II, i)); // Always let the value type influence the used register class. The // constraints on the instruction may be too lax to represent the value // type correctly. For example, a 64-bit float (X86::FR64) can't live in @@ -330,7 +330,7 @@ InstrEmitter::AddRegisterOperand(MachineInstrBuilder &MIB, if (II) { const TargetRegisterClass *OpRC = nullptr; if (IIOpNum < II->getNumOperands()) - OpRC = TII->getRegClass(*II, IIOpNum, TRI); + OpRC = TII->getRegClass(*II, IIOpNum); if (OpRC) { unsigned MinNumRegs = MinRCSize; @@ -409,8 +409,7 @@ void InstrEmitter::AddOperand(MachineInstrBuilder &MIB, SDValue Op, Register VReg = R->getReg(); MVT OpVT = Op.getSimpleValueType(); const TargetRegisterClass *IIRC = - II ? TRI->getAllocatableClass(TII->getRegClass(*II, IIOpNum, TRI)) - : nullptr; + II ? TRI->getAllocatableClass(TII->getRegClass(*II, IIOpNum)) : nullptr; const TargetRegisterClass *OpRC = TLI->isTypeLegal(OpVT) ? TLI->getRegClassFor(OpVT, diff --git a/llvm/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp b/llvm/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp index 316aacd..a0baf82 100644 --- a/llvm/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp @@ -4842,9 +4842,15 @@ void SelectionDAGLegalize::ConvertNodeToLibcall(SDNode *Node) { RTLIB::Libcall LC = Node->getOpcode() == ISD::FSINCOS ? RTLIB::getSINCOS(VT) : RTLIB::getSINCOSPI(VT); - bool Expanded = DAG.expandMultipleResultFPLibCall(LC, Node, Results); - if (!Expanded) - llvm_unreachable("Expected scalar FSINCOS[PI] to expand to libcall!"); + bool Expanded = DAG.expandMultipleResultFPLibCall(LC, Node, Results, VT); + if (!Expanded) { + DAG.getContext()->emitError(Twine("no libcall available for ") + + Node->getOperationName(&DAG)); + SDValue Poison = DAG.getPOISON(VT); + Results.push_back(Poison); + Results.push_back(Poison); + } + break; } case ISD::FLOG: @@ -4934,7 +4940,7 @@ void SelectionDAGLegalize::ConvertNodeToLibcall(SDNode *Node) { EVT VT = Node->getValueType(0); RTLIB::Libcall LC = Node->getOpcode() == ISD::FMODF ? RTLIB::getMODF(VT) : RTLIB::getFREXP(VT); - bool Expanded = DAG.expandMultipleResultFPLibCall(LC, Node, Results, + bool Expanded = DAG.expandMultipleResultFPLibCall(LC, Node, Results, VT, /*CallRetResNo=*/0); if (!Expanded) llvm_unreachable("Expected scalar FFREXP/FMODF to expand to libcall!"); diff --git a/llvm/lib/CodeGen/SelectionDAG/LegalizeFloatTypes.cpp b/llvm/lib/CodeGen/SelectionDAG/LegalizeFloatTypes.cpp index 58983cb..29c4dac 100644 --- a/llvm/lib/CodeGen/SelectionDAG/LegalizeFloatTypes.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/LegalizeFloatTypes.cpp @@ -1726,7 +1726,8 @@ void DAGTypeLegalizer::ExpandFloatRes_UnaryWithTwoFPResults( SDNode *N, RTLIB::Libcall LC, std::optional<unsigned> CallRetResNo) { assert(!N->isStrictFPOpcode() && "strictfp not implemented"); SmallVector<SDValue> Results; - DAG.expandMultipleResultFPLibCall(LC, N, Results, CallRetResNo); + DAG.expandMultipleResultFPLibCall(LC, N, Results, N->getValueType(0), + CallRetResNo); for (auto [ResNo, Res] : enumerate(Results)) { SDValue Lo, Hi; GetPairElements(Res, Lo, Hi); diff --git a/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp b/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp index 94751be5..f5a54497 100644 --- a/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp @@ -1268,20 +1268,30 @@ void VectorLegalizer::Expand(SDNode *Node, SmallVectorImpl<SDValue> &Results) { return; break; - case ISD::FSINCOS: + case ISD::FSINCOSPI: { + EVT VT = Node->getValueType(0); + RTLIB::Libcall LC = RTLIB::getSINCOSPI(VT); + if (LC != RTLIB::UNKNOWN_LIBCALL && + DAG.expandMultipleResultFPLibCall(LC, Node, Results, VT)) + return; + + // TODO: Try to see if there's a narrower call available to use before + // scalarizing. + break; + } + case ISD::FSINCOS: { + // FIXME: Try to directly match vector case like fsincospi EVT VT = Node->getValueType(0).getVectorElementType(); - RTLIB::Libcall LC = Node->getOpcode() == ISD::FSINCOS - ? RTLIB::getSINCOS(VT) - : RTLIB::getSINCOSPI(VT); - if (DAG.expandMultipleResultFPLibCall(LC, Node, Results)) + RTLIB::Libcall LC = RTLIB::getSINCOS(VT); + if (DAG.expandMultipleResultFPLibCall(LC, Node, Results, VT)) return; break; } case ISD::FMODF: { - RTLIB::Libcall LC = - RTLIB::getMODF(Node->getValueType(0).getVectorElementType()); - if (DAG.expandMultipleResultFPLibCall(LC, Node, Results, + EVT VT = Node->getValueType(0).getVectorElementType(); + RTLIB::Libcall LC = RTLIB::getMODF(VT); + if (DAG.expandMultipleResultFPLibCall(LC, Node, Results, VT, /*CallRetResNo=*/0)) return; break; diff --git a/llvm/lib/CodeGen/SelectionDAG/ScheduleDAGRRList.cpp b/llvm/lib/CodeGen/SelectionDAG/ScheduleDAGRRList.cpp index f70b6cd..12fc26d 100644 --- a/llvm/lib/CodeGen/SelectionDAG/ScheduleDAGRRList.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/ScheduleDAGRRList.cpp @@ -340,7 +340,7 @@ static void GetCostForDef(const ScheduleDAGSDNodes::RegDefIter &RegDefPos, unsigned Idx = RegDefPos.GetIdx(); const MCInstrDesc &Desc = TII->get(Opcode); - const TargetRegisterClass *RC = TII->getRegClass(Desc, Idx, TRI); + const TargetRegisterClass *RC = TII->getRegClass(Desc, Idx); assert(RC && "Not a valid register class"); RegClass = RC->getID(); // FIXME: Cost arbitrarily set to 1 because there doesn't seem to be a diff --git a/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp b/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp index 80bbfea..b5d502b 100644 --- a/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp @@ -2514,18 +2514,20 @@ static bool canFoldStoreIntoLibCallOutputPointers(StoreSDNode *StoreNode, bool SelectionDAG::expandMultipleResultFPLibCall( RTLIB::Libcall LC, SDNode *Node, SmallVectorImpl<SDValue> &Results, - std::optional<unsigned> CallRetResNo) { - LLVMContext &Ctx = *getContext(); - EVT VT = Node->getValueType(0); - unsigned NumResults = Node->getNumValues(); - + EVT CallVT, std::optional<unsigned> CallRetResNo) { if (LC == RTLIB::UNKNOWN_LIBCALL) return false; - const char *LCName = TLI->getLibcallName(LC); - if (!LCName) + EVT VT = Node->getValueType(0); + + RTLIB::LibcallImpl Impl = TLI->getLibcallImpl(LC); + if (Impl == RTLIB::Unsupported) return false; + StringRef LCName = TLI->getLibcallImplName(Impl); + + // FIXME: This should not use TargetLibraryInfo. There should be + // RTLIB::Libcall entries for each used vector type, and directly matched. auto getVecDesc = [&]() -> VecDesc const * { for (bool Masked : {false, true}) { if (VecDesc const *VD = getLibInfo().getVectorMappingInfo( @@ -2538,9 +2540,34 @@ bool SelectionDAG::expandMultipleResultFPLibCall( // For vector types, we must find a vector mapping for the libcall. VecDesc const *VD = nullptr; - if (VT.isVector() && !(VD = getVecDesc())) + if (VT.isVector() && !CallVT.isVector() && !(VD = getVecDesc())) return false; + bool IsMasked = (VD && VD->isMasked()) || + RTLIB::RuntimeLibcallsInfo::hasVectorMaskArgument(Impl); + + // This wrapper function exists because getVectorMappingInfo works in terms of + // function names instead of RTLIB enums. + + // FIXME: If we used a vector mapping, this assumes the calling convention of + // the vector function is the same as the scalar. + + StringRef Name = VD ? VD->getVectorFnName() : LCName; + + return expandMultipleResultFPLibCall(Name, + TLI->getLibcallImplCallingConv(Impl), + Node, Results, CallRetResNo, IsMasked); +} + +// FIXME: This belongs in TargetLowering +bool SelectionDAG::expandMultipleResultFPLibCall( + StringRef Name, CallingConv::ID CC, SDNode *Node, + SmallVectorImpl<SDValue> &Results, std::optional<unsigned> CallRetResNo, + bool IsMasked) { + LLVMContext &Ctx = *getContext(); + EVT VT = Node->getValueType(0); + unsigned NumResults = Node->getNumValues(); + // Find users of the node that store the results (and share input chains). The // destination pointers can be used instead of creating stack allocations. SDValue StoresInChain; @@ -2598,7 +2625,7 @@ bool SelectionDAG::expandMultipleResultFPLibCall( SDLoc DL(Node); // Pass the vector mask (if required). - if (VD && VD->isMasked()) { + if (IsMasked) { EVT MaskVT = TLI->getSetCCResultType(getDataLayout(), Ctx, VT); SDValue Mask = getBoolConstant(true, DL, MaskVT, VT); Args.emplace_back(Mask, MaskVT.getTypeForEVT(Ctx)); @@ -2608,11 +2635,11 @@ bool SelectionDAG::expandMultipleResultFPLibCall( ? Node->getValueType(*CallRetResNo).getTypeForEVT(Ctx) : Type::getVoidTy(Ctx); SDValue InChain = StoresInChain ? StoresInChain : getEntryNode(); - SDValue Callee = getExternalSymbol(VD ? VD->getVectorFnName().data() : LCName, - TLI->getPointerTy(getDataLayout())); + SDValue Callee = + getExternalSymbol(Name.data(), TLI->getPointerTy(getDataLayout())); TargetLowering::CallLoweringInfo CLI(*this); - CLI.setDebugLoc(DL).setChain(InChain).setLibCallee( - TLI->getLibcallCallingConv(LC), RetType, Callee, std::move(Args)); + CLI.setDebugLoc(DL).setChain(InChain).setLibCallee(CC, RetType, Callee, + std::move(Args)); auto [Call, CallChain] = TLI->LowerCallTo(CLI); @@ -2920,6 +2947,34 @@ bool SelectionDAG::SignBitIsZero(SDValue Op, unsigned Depth) const { return MaskedValueIsZero(Op, APInt::getSignMask(BitWidth), Depth); } +bool SelectionDAG::SignBitIsZeroFP(SDValue Op, unsigned Depth) const { + if (Depth >= MaxRecursionDepth) + return false; // Limit search depth. + + unsigned Opc = Op.getOpcode(); + switch (Opc) { + case ISD::FABS: + return true; + case ISD::AssertNoFPClass: { + FPClassTest NoFPClass = + static_cast<FPClassTest>(Op.getConstantOperandVal(1)); + + const FPClassTest TestMask = fcNan | fcNegative; + return (NoFPClass & TestMask) == TestMask; + } + case ISD::ARITH_FENCE: + return SignBitIsZeroFP(Op, Depth + 1); + case ISD::FEXP: + case ISD::FEXP2: + case ISD::FEXP10: + return Op->getFlags().hasNoNaNs(); + default: + return false; + } + + llvm_unreachable("covered opcode switch"); +} + /// MaskedValueIsZero - Return true if 'V & Mask' is known to be zero. We use /// this predicate to simplify operations downstream. Mask is known to be zero /// for bits that V cannot have. @@ -4121,6 +4176,25 @@ KnownBits SelectionDAG::computeKnownBits(SDValue Op, const APInt &DemandedElts, Known.One.clearLowBits(LogOfAlign); break; } + case ISD::AssertNoFPClass: { + Known = computeKnownBits(Op.getOperand(0), DemandedElts, Depth + 1); + + FPClassTest NoFPClass = + static_cast<FPClassTest>(Op.getConstantOperandVal(1)); + const FPClassTest NegativeTestMask = fcNan | fcNegative; + if ((NoFPClass & NegativeTestMask) == NegativeTestMask) { + // Cannot be negative. + Known.makeNonNegative(); + } + + const FPClassTest PositiveTestMask = fcNan | fcPositive; + if ((NoFPClass & PositiveTestMask) == PositiveTestMask) { + // Cannot be positive. + Known.makeNegative(); + } + + break; + } case ISD::FGETSIGN: // All bits are zero except the low bit. Known.Zero.setBitsFrom(1); diff --git a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp index 88b0809..6a9022d 100644 --- a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp @@ -4638,6 +4638,12 @@ static std::optional<ConstantRange> getRange(const Instruction &I) { return std::nullopt; } +static FPClassTest getNoFPClass(const Instruction &I) { + if (const auto *CB = dyn_cast<CallBase>(&I)) + return CB->getRetNoFPClass(); + return fcNone; +} + void SelectionDAGBuilder::visitLoad(const LoadInst &I) { if (I.isAtomic()) return visitAtomicLoad(I); @@ -9132,6 +9138,7 @@ void SelectionDAGBuilder::LowerCallTo(const CallBase &CB, SDValue Callee, if (Result.first.getNode()) { Result.first = lowerRangeToAssertZExt(DAG, CB, Result.first); + Result.first = lowerNoFPClassToAssertNoFPClass(DAG, CB, Result.first); setValue(&CB, Result.first); } @@ -10718,6 +10725,16 @@ SDValue SelectionDAGBuilder::lowerRangeToAssertZExt(SelectionDAG &DAG, return DAG.getMergeValues(Ops, SL); } +SDValue SelectionDAGBuilder::lowerNoFPClassToAssertNoFPClass( + SelectionDAG &DAG, const Instruction &I, SDValue Op) { + FPClassTest Classes = getNoFPClass(I); + if (Classes == fcNone) + return Op; + + return DAG.getNode(ISD::AssertNoFPClass, SDLoc(Op), Op.getValueType(), Op, + DAG.getTargetConstant(Classes, SDLoc(), MVT::i32)); +} + /// Populate a CallLowerinInfo (into \p CLI) based on the properties of /// the call being lowered. /// diff --git a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.h b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.h index ed63bee..13e2daa 100644 --- a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.h +++ b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.h @@ -429,6 +429,10 @@ public: SDValue lowerRangeToAssertZExt(SelectionDAG &DAG, const Instruction &I, SDValue Op); + // Lower nofpclass attributes to AssertNoFPClass + SDValue lowerNoFPClassToAssertNoFPClass(SelectionDAG &DAG, + const Instruction &I, SDValue Op); + void populateCallLoweringInfo(TargetLowering::CallLoweringInfo &CLI, const CallBase *Call, unsigned ArgIdx, unsigned NumArgs, SDValue Callee, diff --git a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp index 8bc5d2f..e78dfb1 100644 --- a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp @@ -2448,7 +2448,7 @@ bool SelectionDAGISel::IsLegalToFold(SDValue N, SDNode *U, SDNode *Root, // a cycle in the scheduling graph. // If the node has glue, walk down the graph to the "lowest" node in the - // glueged set. + // glued set. EVT VT = Root->getValueType(Root->getNumValues()-1); while (VT == MVT::Glue) { SDNode *GU = Root->getGluedUser(); diff --git a/llvm/lib/CodeGen/SplitKit.cpp b/llvm/lib/CodeGen/SplitKit.cpp index f9ecb2c..8ec4bfb 100644 --- a/llvm/lib/CodeGen/SplitKit.cpp +++ b/llvm/lib/CodeGen/SplitKit.cpp @@ -1509,10 +1509,9 @@ void SplitEditor::forceRecomputeVNI(const VNInfo &ParentVNI) { } // Trace value through phis. - SmallPtrSet<const VNInfo *, 8> Visited; ///< whether VNI was/is in worklist. - SmallVector<const VNInfo *, 4> WorkList; - Visited.insert(&ParentVNI); - WorkList.push_back(&ParentVNI); + ///< whether VNI was/is in worklist. + SmallPtrSet<const VNInfo *, 8> Visited = {&ParentVNI}; + SmallVector<const VNInfo *, 4> WorkList = {&ParentVNI}; const LiveInterval &ParentLI = Edit->getParent(); const SlotIndexes &Indexes = *LIS.getSlotIndexes(); diff --git a/llvm/lib/CodeGen/TargetFrameLoweringImpl.cpp b/llvm/lib/CodeGen/TargetFrameLoweringImpl.cpp index 70c3b2c..ebf6d1a 100644 --- a/llvm/lib/CodeGen/TargetFrameLoweringImpl.cpp +++ b/llvm/lib/CodeGen/TargetFrameLoweringImpl.cpp @@ -198,7 +198,7 @@ void TargetFrameLowering::spillCalleeSavedRegister( } else { const TargetRegisterClass *RC = TRI->getMinimalPhysRegClass(Reg); TII->storeRegToStackSlot(SaveBlock, MI, Reg, true, CS.getFrameIdx(), RC, - TRI, Register()); + Register()); } } @@ -212,8 +212,7 @@ void TargetFrameLowering::restoreCalleeSavedRegister( .addReg(CS.getDstReg(), getKillRegState(true)); } else { const TargetRegisterClass *RC = TRI->getMinimalPhysRegClass(Reg); - TII->loadRegFromStackSlot(MBB, MI, Reg, CS.getFrameIdx(), RC, TRI, - Register()); + TII->loadRegFromStackSlot(MBB, MI, Reg, CS.getFrameIdx(), RC, Register()); assert(MI != MBB.begin() && "loadRegFromStackSlot didn't insert any code!"); } } diff --git a/llvm/lib/CodeGen/TargetInstrInfo.cpp b/llvm/lib/CodeGen/TargetInstrInfo.cpp index 3c41bbe..d503d7a 100644 --- a/llvm/lib/CodeGen/TargetInstrInfo.cpp +++ b/llvm/lib/CodeGen/TargetInstrInfo.cpp @@ -58,9 +58,8 @@ static cl::opt<unsigned int> MaxAccumulatorWidth( TargetInstrInfo::~TargetInstrInfo() = default; -const TargetRegisterClass * -TargetInstrInfo::getRegClass(const MCInstrDesc &MCID, unsigned OpNum, - const TargetRegisterInfo *TRI) const { +const TargetRegisterClass *TargetInstrInfo::getRegClass(const MCInstrDesc &MCID, + unsigned OpNum) const { if (OpNum >= MCID.getNumOperands()) return nullptr; @@ -69,14 +68,14 @@ TargetInstrInfo::getRegClass(const MCInstrDesc &MCID, unsigned OpNum, // TODO: Remove isLookupPtrRegClass in favor of isLookupRegClassByHwMode if (OpInfo.isLookupPtrRegClass()) - return TRI->getPointerRegClass(RegClass); + return TRI.getPointerRegClass(RegClass); // Instructions like INSERT_SUBREG do not have fixed register classes. if (RegClass < 0) return nullptr; // Otherwise just look it up normally. - return TRI->getRegClass(RegClass); + return TRI.getRegClass(RegClass); } /// insertNoop - Insert a noop into the instruction stream at the specified @@ -223,13 +222,11 @@ MachineInstr *TargetInstrInfo::commuteInstructionImpl(MachineInstr &MI, // %1.sub = INST %1.sub(tied), %0.sub, implicit-def %1 SmallVector<unsigned> UpdateImplicitDefIdx; if (HasDef && MI.hasImplicitDef()) { - const TargetRegisterInfo *TRI = - MI.getMF()->getSubtarget().getRegisterInfo(); for (auto [OpNo, MO] : llvm::enumerate(MI.implicit_operands())) { Register ImplReg = MO.getReg(); if ((ImplReg.isVirtual() && ImplReg == Reg0) || (ImplReg.isPhysical() && Reg0.isPhysical() && - TRI->isSubRegisterEq(ImplReg, Reg0))) + TRI.isSubRegisterEq(ImplReg, Reg0))) UpdateImplicitDefIdx.push_back(OpNo + MI.getNumExplicitOperands()); } } @@ -425,28 +422,27 @@ bool TargetInstrInfo::getStackSlotRange(const TargetRegisterClass *RC, unsigned SubIdx, unsigned &Size, unsigned &Offset, const MachineFunction &MF) const { - const TargetRegisterInfo *TRI = MF.getSubtarget().getRegisterInfo(); if (!SubIdx) { - Size = TRI->getSpillSize(*RC); + Size = TRI.getSpillSize(*RC); Offset = 0; return true; } - unsigned BitSize = TRI->getSubRegIdxSize(SubIdx); + unsigned BitSize = TRI.getSubRegIdxSize(SubIdx); // Convert bit size to byte size. if (BitSize % 8) return false; - int BitOffset = TRI->getSubRegIdxOffset(SubIdx); + int BitOffset = TRI.getSubRegIdxOffset(SubIdx); if (BitOffset < 0 || BitOffset % 8) return false; Size = BitSize / 8; Offset = (unsigned)BitOffset / 8; - assert(TRI->getSpillSize(*RC) >= (Offset + Size) && "bad subregister range"); + assert(TRI.getSpillSize(*RC) >= (Offset + Size) && "bad subregister range"); if (!MF.getDataLayout().isLittleEndian()) { - Offset = TRI->getSpillSize(*RC) - (Offset + Size); + Offset = TRI.getSpillSize(*RC) - (Offset + Size); } return true; } @@ -454,8 +450,7 @@ bool TargetInstrInfo::getStackSlotRange(const TargetRegisterClass *RC, void TargetInstrInfo::reMaterialize(MachineBasicBlock &MBB, MachineBasicBlock::iterator I, Register DestReg, unsigned SubIdx, - const MachineInstr &Orig, - const TargetRegisterInfo &TRI) const { + const MachineInstr &Orig) const { MachineInstr *MI = MBB.getParent()->CloneMachineInstr(&Orig); MI->substituteRegister(MI->getOperand(0).getReg(), DestReg, SubIdx, TRI); MBB.insert(I, MI); @@ -726,7 +721,6 @@ MachineInstr *TargetInstrInfo::foldMemoryOperand(MachineInstr &MI, // actual load size is. int64_t MemSize = 0; const MachineFrameInfo &MFI = MF.getFrameInfo(); - const TargetRegisterInfo *TRI = MF.getSubtarget().getRegisterInfo(); if (Flags & MachineMemOperand::MOStore) { MemSize = MFI.getObjectSize(FI); @@ -735,7 +729,7 @@ MachineInstr *TargetInstrInfo::foldMemoryOperand(MachineInstr &MI, int64_t OpSize = MFI.getObjectSize(FI); if (auto SubReg = MI.getOperand(OpIdx).getSubReg()) { - unsigned SubRegSize = TRI->getSubRegIdxSize(SubReg); + unsigned SubRegSize = TRI.getSubRegIdxSize(SubReg); if (SubRegSize > 0 && !(SubRegSize % 8)) OpSize = SubRegSize / 8; } @@ -800,11 +794,11 @@ MachineInstr *TargetInstrInfo::foldMemoryOperand(MachineInstr &MI, // code. BuildMI(*MBB, Pos, MI.getDebugLoc(), get(TargetOpcode::KILL)).add(MO); } else { - storeRegToStackSlot(*MBB, Pos, MO.getReg(), MO.isKill(), FI, RC, TRI, + storeRegToStackSlot(*MBB, Pos, MO.getReg(), MO.isKill(), FI, RC, Register()); } } else - loadRegFromStackSlot(*MBB, Pos, MO.getReg(), FI, RC, TRI, Register()); + loadRegFromStackSlot(*MBB, Pos, MO.getReg(), FI, RC, Register()); return &*--Pos; } @@ -880,8 +874,8 @@ static void transferImplicitOperands(MachineInstr *MI, } } -void TargetInstrInfo::lowerCopy(MachineInstr *MI, - const TargetRegisterInfo *TRI) const { +void TargetInstrInfo::lowerCopy( + MachineInstr *MI, const TargetRegisterInfo * /*Remove me*/) const { if (MI->allDefsAreDead()) { MI->setDesc(get(TargetOpcode::KILL)); return; @@ -911,7 +905,7 @@ void TargetInstrInfo::lowerCopy(MachineInstr *MI, SrcMO.getReg().isPhysical() ? SrcMO.isRenamable() : false); if (MI->getNumOperands() > 2) - transferImplicitOperands(MI, TRI); + transferImplicitOperands(MI, &TRI); MI->eraseFromParent(); } @@ -1327,8 +1321,7 @@ void TargetInstrInfo::reassociateOps( MachineFunction *MF = Root.getMF(); MachineRegisterInfo &MRI = MF->getRegInfo(); const TargetInstrInfo *TII = MF->getSubtarget().getInstrInfo(); - const TargetRegisterInfo *TRI = MF->getSubtarget().getRegisterInfo(); - const TargetRegisterClass *RC = Root.getRegClassConstraint(0, TII, TRI); + const TargetRegisterClass *RC = Root.getRegClassConstraint(0, TII, &TRI); MachineOperand &OpA = Prev.getOperand(OperandIndices[1]); MachineOperand &OpB = Root.getOperand(OperandIndices[2]); @@ -1337,9 +1330,12 @@ void TargetInstrInfo::reassociateOps( MachineOperand &OpC = Root.getOperand(0); Register RegA = OpA.getReg(); + unsigned SubRegA = OpA.getSubReg(); Register RegB = OpB.getReg(); Register RegX = OpX.getReg(); + unsigned SubRegX = OpX.getSubReg(); Register RegY = OpY.getReg(); + unsigned SubRegY = OpY.getSubReg(); Register RegC = OpC.getReg(); if (RegA.isVirtual()) @@ -1357,6 +1353,7 @@ void TargetInstrInfo::reassociateOps( // recycling RegB because the MachineCombiner's computation of the critical // path requires a new register definition rather than an existing one. Register NewVR = MRI.createVirtualRegister(RC); + unsigned SubRegNewVR = 0; InstrIdxForVirtReg.insert(std::make_pair(NewVR, 0)); auto [NewRootOpc, NewPrevOpc] = getReassociationOpcodes(Pattern, Root, Prev); @@ -1369,6 +1366,7 @@ void TargetInstrInfo::reassociateOps( if (SwapPrevOperands) { std::swap(RegX, RegY); + std::swap(SubRegX, SubRegY); std::swap(KillX, KillY); } @@ -1421,9 +1419,9 @@ void TargetInstrInfo::reassociateOps( if (Idx == 0) continue; if (Idx == PrevFirstOpIdx) - MIB1.addReg(RegX, getKillRegState(KillX)); + MIB1.addReg(RegX, getKillRegState(KillX), SubRegX); else if (Idx == PrevSecondOpIdx) - MIB1.addReg(RegY, getKillRegState(KillY)); + MIB1.addReg(RegY, getKillRegState(KillY), SubRegY); else MIB1.add(MO); } @@ -1431,6 +1429,7 @@ void TargetInstrInfo::reassociateOps( if (SwapRootOperands) { std::swap(RegA, NewVR); + std::swap(SubRegA, SubRegNewVR); std::swap(KillA, KillNewVR); } @@ -1442,9 +1441,9 @@ void TargetInstrInfo::reassociateOps( if (Idx == 0) continue; if (Idx == RootFirstOpIdx) - MIB2 = MIB2.addReg(RegA, getKillRegState(KillA)); + MIB2 = MIB2.addReg(RegA, getKillRegState(KillA), SubRegA); else if (Idx == RootSecondOpIdx) - MIB2 = MIB2.addReg(NewVR, getKillRegState(KillNewVR)); + MIB2 = MIB2.addReg(NewVR, getKillRegState(KillNewVR), SubRegNewVR); else MIB2 = MIB2.add(MO); } @@ -1532,6 +1531,7 @@ void TargetInstrInfo::genAlternativeCodeSequence( if (IndexedReg.index() == 0) continue; + // FIXME: Losing subregisters MachineInstr *Instr = MRI.getUniqueVRegDef(IndexedReg.value()); MachineInstrBuilder MIB; Register AccReg; @@ -1704,8 +1704,7 @@ bool TargetInstrInfo::isSchedulingBoundary(const MachineInstr &MI, // stack slot reference to depend on the instruction that does the // modification. const TargetLowering &TLI = *MF.getSubtarget().getTargetLowering(); - const TargetRegisterInfo *TRI = MF.getSubtarget().getRegisterInfo(); - return MI.modifiesRegister(TLI.getStackPointerRegisterToSaveRestore(), TRI); + return MI.modifiesRegister(TLI.getStackPointerRegisterToSaveRestore(), &TRI); } // Provide a global flag for disabling the PreRA hazard recognizer that targets @@ -1738,11 +1737,11 @@ CreateTargetPostRAHazardRecognizer(const InstrItineraryData *II, // Default implementation of getMemOperandWithOffset. bool TargetInstrInfo::getMemOperandWithOffset( const MachineInstr &MI, const MachineOperand *&BaseOp, int64_t &Offset, - bool &OffsetIsScalable, const TargetRegisterInfo *TRI) const { + bool &OffsetIsScalable, const TargetRegisterInfo * /*RemoveMe*/) const { SmallVector<const MachineOperand *, 4> BaseOps; LocationSize Width = LocationSize::precise(0); if (!getMemOperandsWithOffsetWidth(MI, BaseOps, Offset, OffsetIsScalable, - Width, TRI) || + Width, &TRI) || BaseOps.size() != 1) return false; BaseOp = BaseOps.front(); @@ -1863,7 +1862,6 @@ std::optional<ParamLoadedValue> TargetInstrInfo::describeLoadedValue(const MachineInstr &MI, Register Reg) const { const MachineFunction *MF = MI.getMF(); - const TargetRegisterInfo *TRI = MF->getSubtarget().getRegisterInfo(); DIExpression *Expr = DIExpression::get(MF->getFunction().getContext(), {}); int64_t Offset; bool OffsetIsScalable; @@ -1894,7 +1892,6 @@ TargetInstrInfo::describeLoadedValue(const MachineInstr &MI, // Only describe memory which provably does not escape the function. As // described in llvm.org/PR43343, escaped memory may be clobbered by the // callee (or by another thread). - const auto &TII = MF->getSubtarget().getInstrInfo(); const MachineFrameInfo &MFI = MF->getFrameInfo(); const MachineMemOperand *MMO = MI.memoperands()[0]; const PseudoSourceValue *PSV = MMO->getPseudoValue(); @@ -1905,8 +1902,7 @@ TargetInstrInfo::describeLoadedValue(const MachineInstr &MI, return std::nullopt; const MachineOperand *BaseOp; - if (!TII->getMemOperandWithOffset(MI, BaseOp, Offset, OffsetIsScalable, - TRI)) + if (!getMemOperandWithOffset(MI, BaseOp, Offset, OffsetIsScalable, &TRI)) return std::nullopt; // FIXME: Scalable offsets are not yet handled in the offset code below. @@ -2045,7 +2041,7 @@ bool TargetInstrInfo::getInsertSubregInputs( // Returns a MIRPrinter comment for this machine operand. std::string TargetInstrInfo::createMIROperandComment( const MachineInstr &MI, const MachineOperand &Op, unsigned OpIdx, - const TargetRegisterInfo *TRI) const { + const TargetRegisterInfo * /*RemoveMe*/) const { if (!MI.isInlineAsm()) return ""; @@ -2078,12 +2074,8 @@ std::string TargetInstrInfo::createMIROperandComment( OS << F.getKindName(); unsigned RCID; - if (!F.isImmKind() && !F.isMemKind() && F.hasRegClassConstraint(RCID)) { - if (TRI) { - OS << ':' << TRI->getRegClassName(TRI->getRegClass(RCID)); - } else - OS << ":RC" << RCID; - } + if (!F.isImmKind() && !F.isMemKind() && F.hasRegClassConstraint(RCID)) + OS << ':' << TRI.getRegClassName(TRI.getRegClass(RCID)); if (F.isMemKind()) { InlineAsm::ConstraintCode MCID = F.getMemoryConstraintID(); diff --git a/llvm/lib/CodeGen/TargetLoweringBase.cpp b/llvm/lib/CodeGen/TargetLoweringBase.cpp index 1cc591c..814b4b5 100644 --- a/llvm/lib/CodeGen/TargetLoweringBase.cpp +++ b/llvm/lib/CodeGen/TargetLoweringBase.cpp @@ -430,6 +430,24 @@ RTLIB::Libcall RTLIB::getSINCOS(EVT RetVT) { } RTLIB::Libcall RTLIB::getSINCOSPI(EVT RetVT) { + // TODO: Tablegen should generate this function + if (RetVT.isVector()) { + if (!RetVT.isSimple()) + return RTLIB::UNKNOWN_LIBCALL; + switch (RetVT.getSimpleVT().SimpleTy) { + case MVT::v4f32: + return RTLIB::SINCOSPI_V4F32; + case MVT::v2f64: + return RTLIB::SINCOSPI_V2F64; + case MVT::nxv4f32: + return RTLIB::SINCOSPI_NXV4F32; + case MVT::nxv2f64: + return RTLIB::SINCOSPI_NXV2F64; + default: + return RTLIB::UNKNOWN_LIBCALL; + } + } + return getFPLibCall(RetVT, SINCOSPI_F32, SINCOSPI_F64, SINCOSPI_F80, SINCOSPI_F128, SINCOSPI_PPCF128); } diff --git a/llvm/lib/CodeGen/TwoAddressInstructionPass.cpp b/llvm/lib/CodeGen/TwoAddressInstructionPass.cpp index b99e1c7..3f2961c 100644 --- a/llvm/lib/CodeGen/TwoAddressInstructionPass.cpp +++ b/llvm/lib/CodeGen/TwoAddressInstructionPass.cpp @@ -1402,7 +1402,7 @@ bool TwoAddressInstructionImpl::tryInstructionTransform( // Unfold the load. LLVM_DEBUG(dbgs() << "2addr: UNFOLDING: " << MI); const TargetRegisterClass *RC = TRI->getAllocatableClass( - TII->getRegClass(UnfoldMCID, LoadRegIndex, TRI)); + TII->getRegClass(UnfoldMCID, LoadRegIndex)); Register Reg = MRI->createVirtualRegister(RC); SmallVector<MachineInstr *, 2> NewMIs; if (!TII->unfoldMemoryOperand(*MF, MI, Reg, diff --git a/llvm/lib/Frontend/Driver/CodeGenOptions.cpp b/llvm/lib/Frontend/Driver/CodeGenOptions.cpp index b546e81..4e16027 100644 --- a/llvm/lib/Frontend/Driver/CodeGenOptions.cpp +++ b/llvm/lib/Frontend/Driver/CodeGenOptions.cpp @@ -8,6 +8,7 @@ #include "llvm/Frontend/Driver/CodeGenOptions.h" #include "llvm/Analysis/TargetLibraryInfo.h" +#include "llvm/IR/SystemLibraries.h" #include "llvm/ProfileData/InstrProfCorrelator.h" #include "llvm/TargetParser/Triple.h" @@ -25,35 +26,35 @@ TargetLibraryInfoImpl *createTLII(const llvm::Triple &TargetTriple, using VectorLibrary = llvm::driver::VectorLibrary; switch (Veclib) { case VectorLibrary::Accelerate: - TLII->addVectorizableFunctionsFromVecLib(TargetLibraryInfoImpl::Accelerate, + TLII->addVectorizableFunctionsFromVecLib(llvm::VectorLibrary::Accelerate, TargetTriple); break; case VectorLibrary::LIBMVEC: - TLII->addVectorizableFunctionsFromVecLib(TargetLibraryInfoImpl::LIBMVEC, + TLII->addVectorizableFunctionsFromVecLib(llvm::VectorLibrary::LIBMVEC, TargetTriple); break; case VectorLibrary::MASSV: - TLII->addVectorizableFunctionsFromVecLib(TargetLibraryInfoImpl::MASSV, + TLII->addVectorizableFunctionsFromVecLib(llvm::VectorLibrary::MASSV, TargetTriple); break; case VectorLibrary::SVML: - TLII->addVectorizableFunctionsFromVecLib(TargetLibraryInfoImpl::SVML, + TLII->addVectorizableFunctionsFromVecLib(llvm::VectorLibrary::SVML, TargetTriple); break; case VectorLibrary::SLEEF: - TLII->addVectorizableFunctionsFromVecLib(TargetLibraryInfoImpl::SLEEFGNUABI, + TLII->addVectorizableFunctionsFromVecLib(llvm::VectorLibrary::SLEEFGNUABI, TargetTriple); break; case VectorLibrary::Darwin_libsystem_m: TLII->addVectorizableFunctionsFromVecLib( - TargetLibraryInfoImpl::DarwinLibSystemM, TargetTriple); + llvm::VectorLibrary::DarwinLibSystemM, TargetTriple); break; case VectorLibrary::ArmPL: - TLII->addVectorizableFunctionsFromVecLib(TargetLibraryInfoImpl::ArmPL, + TLII->addVectorizableFunctionsFromVecLib(llvm::VectorLibrary::ArmPL, TargetTriple); break; case VectorLibrary::AMDLIBM: - TLII->addVectorizableFunctionsFromVecLib(TargetLibraryInfoImpl::AMDLIBM, + TLII->addVectorizableFunctionsFromVecLib(llvm::VectorLibrary::AMDLIBM, TargetTriple); break; default: diff --git a/llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp b/llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp index fff9a81..ac86fa85 100644 --- a/llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp +++ b/llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp @@ -530,7 +530,13 @@ void OpenMPIRBuilder::getKernelArgsVector(TargetKernelArgs &KernelArgs, auto Int32Ty = Type::getInt32Ty(Builder.getContext()); constexpr size_t MaxDim = 3; Value *ZeroArray = Constant::getNullValue(ArrayType::get(Int32Ty, MaxDim)); - Value *Flags = Builder.getInt64(KernelArgs.HasNoWait); + + Value *HasNoWaitFlag = Builder.getInt64(KernelArgs.HasNoWait); + + Value *DynCGroupMemFallbackFlag = + Builder.getInt64(static_cast<uint64_t>(KernelArgs.DynCGroupMemFallback)); + DynCGroupMemFallbackFlag = Builder.CreateShl(DynCGroupMemFallbackFlag, 2); + Value *Flags = Builder.CreateOr(HasNoWaitFlag, DynCGroupMemFallbackFlag); assert(!KernelArgs.NumTeams.empty() && !KernelArgs.NumThreads.empty()); @@ -559,7 +565,7 @@ void OpenMPIRBuilder::getKernelArgsVector(TargetKernelArgs &KernelArgs, Flags, NumTeams3D, NumThreads3D, - KernelArgs.DynCGGroupMem}; + KernelArgs.DynCGroupMem}; } void OpenMPIRBuilder::addAttributes(omp::RuntimeFunction FnID, Function &Fn) { @@ -8224,7 +8230,8 @@ static void emitTargetCall( OpenMPIRBuilder::GenMapInfoCallbackTy GenMapInfoCB, OpenMPIRBuilder::CustomMapperCallbackTy CustomMapperCB, const SmallVector<llvm::OpenMPIRBuilder::DependData> &Dependencies, - bool HasNoWait) { + bool HasNoWait, Value *DynCGroupMem, + OMPDynGroupprivateFallbackType DynCGroupMemFallback) { // Generate a function call to the host fallback implementation of the target // region. This is called by the host when no offload entry was generated for // the target region and when the offloading call fails at runtime. @@ -8360,12 +8367,13 @@ static void emitTargetCall( /*isSigned=*/false) : Builder.getInt64(0); - // TODO: Use correct DynCGGroupMem - Value *DynCGGroupMem = Builder.getInt32(0); + // Request zero groupprivate bytes by default. + if (!DynCGroupMem) + DynCGroupMem = Builder.getInt32(0); - KArgs = OpenMPIRBuilder::TargetKernelArgs(NumTargetItems, RTArgs, TripCount, - NumTeamsC, NumThreadsC, - DynCGGroupMem, HasNoWait); + KArgs = OpenMPIRBuilder::TargetKernelArgs( + NumTargetItems, RTArgs, TripCount, NumTeamsC, NumThreadsC, DynCGroupMem, + HasNoWait, DynCGroupMemFallback); // Assume no error was returned because TaskBodyCB and // EmitTargetCallFallbackCB don't produce any. @@ -8414,7 +8422,8 @@ OpenMPIRBuilder::InsertPointOrErrorTy OpenMPIRBuilder::createTarget( OpenMPIRBuilder::TargetBodyGenCallbackTy CBFunc, OpenMPIRBuilder::TargetGenArgAccessorsCallbackTy ArgAccessorFuncCB, CustomMapperCallbackTy CustomMapperCB, - const SmallVector<DependData> &Dependencies, bool HasNowait) { + const SmallVector<DependData> &Dependencies, bool HasNowait, + Value *DynCGroupMem, OMPDynGroupprivateFallbackType DynCGroupMemFallback) { if (!updateToLocation(Loc)) return InsertPointTy(); @@ -8437,7 +8446,8 @@ OpenMPIRBuilder::InsertPointOrErrorTy OpenMPIRBuilder::createTarget( if (!Config.isTargetDevice()) emitTargetCall(*this, Builder, AllocaIP, Info, DefaultAttrs, RuntimeAttrs, IfCond, OutlinedFn, OutlinedFnID, Inputs, GenMapInfoCB, - CustomMapperCB, Dependencies, HasNowait); + CustomMapperCB, Dependencies, HasNowait, DynCGroupMem, + DynCGroupMemFallback); return Builder.saveIP(); } @@ -8460,9 +8470,8 @@ OpenMPIRBuilder::createPlatformSpecificName(ArrayRef<StringRef> Parts) const { Config.separator()); } -GlobalVariable * -OpenMPIRBuilder::getOrCreateInternalVariable(Type *Ty, const StringRef &Name, - unsigned AddressSpace) { +GlobalVariable *OpenMPIRBuilder::getOrCreateInternalVariable( + Type *Ty, const StringRef &Name, std::optional<unsigned> AddressSpace) { auto &Elem = *InternalVars.try_emplace(Name, nullptr).first; if (Elem.second) { assert(Elem.second->getValueType() == Ty && @@ -8472,16 +8481,25 @@ OpenMPIRBuilder::getOrCreateInternalVariable(Type *Ty, const StringRef &Name, // variable for possibly changing that to internal or private, or maybe // create different versions of the function for different OMP internal // variables. + const DataLayout &DL = M.getDataLayout(); + // TODO: Investigate why AMDGPU expects AS 0 for globals even though the + // default global AS is 1. + // See double-target-call-with-declare-target.f90 and + // declare-target-vars-in-target-region.f90 libomptarget + // tests. + unsigned AddressSpaceVal = AddressSpace ? *AddressSpace + : M.getTargetTriple().isAMDGPU() + ? 0 + : DL.getDefaultGlobalsAddressSpace(); auto Linkage = this->M.getTargetTriple().getArch() == Triple::wasm32 ? GlobalValue::InternalLinkage : GlobalValue::CommonLinkage; auto *GV = new GlobalVariable(M, Ty, /*IsConstant=*/false, Linkage, Constant::getNullValue(Ty), Elem.first(), /*InsertBefore=*/nullptr, - GlobalValue::NotThreadLocal, AddressSpace); - const DataLayout &DL = M.getDataLayout(); + GlobalValue::NotThreadLocal, AddressSpaceVal); const llvm::Align TypeAlign = DL.getABITypeAlign(Ty); - const llvm::Align PtrAlign = DL.getPointerABIAlignment(AddressSpace); + const llvm::Align PtrAlign = DL.getPointerABIAlignment(AddressSpaceVal); GV->setAlignment(std::max(TypeAlign, PtrAlign)); Elem.second = GV; } diff --git a/llvm/lib/IR/CMakeLists.txt b/llvm/lib/IR/CMakeLists.txt index 10572ff..ebdc2ca 100644 --- a/llvm/lib/IR/CMakeLists.txt +++ b/llvm/lib/IR/CMakeLists.txt @@ -67,6 +67,7 @@ add_llvm_component_library(LLVMCore ReplaceConstant.cpp Statepoint.cpp StructuralHash.cpp + SystemLibraries.cpp Type.cpp TypedPointerType.cpp TypeFinder.cpp diff --git a/llvm/lib/IR/RuntimeLibcalls.cpp b/llvm/lib/IR/RuntimeLibcalls.cpp index f4c5c6f..e66b9ad 100644 --- a/llvm/lib/IR/RuntimeLibcalls.cpp +++ b/llvm/lib/IR/RuntimeLibcalls.cpp @@ -10,6 +10,7 @@ #include "llvm/ADT/FloatingPointMode.h" #include "llvm/ADT/StringTable.h" #include "llvm/IR/Module.h" +#include "llvm/IR/SystemLibraries.h" #include "llvm/Support/Debug.h" #include "llvm/Support/xxhash.h" #include "llvm/TargetParser/ARMTargetParser.h" @@ -25,6 +26,49 @@ using namespace RTLIB; #define DEFINE_GET_LOOKUP_LIBCALL_IMPL_NAME #include "llvm/IR/RuntimeLibcalls.inc" +RuntimeLibcallsInfo::RuntimeLibcallsInfo(const Triple &TT, + ExceptionHandling ExceptionModel, + FloatABI::ABIType FloatABI, + EABI EABIVersion, StringRef ABIName) { + // FIXME: The ExceptionModel parameter is to handle the field in + // TargetOptions. This interface fails to distinguish the forced disable + // case for targets which support exceptions by default. This should + // probably be a module flag and removed from TargetOptions. + if (ExceptionModel == ExceptionHandling::None) + ExceptionModel = TT.getDefaultExceptionHandling(); + + initLibcalls(TT, ExceptionModel, FloatABI, EABIVersion, ABIName); + + // TODO: Tablegen should generate these sets + switch (ClVectorLibrary) { + case VectorLibrary::SLEEFGNUABI: + for (RTLIB::LibcallImpl Impl : + {RTLIB::impl__ZGVnN2vl8l8_sincos, RTLIB::impl__ZGVnN4vl4l4_sincosf, + RTLIB::impl__ZGVsNxvl8l8_sincos, RTLIB::impl__ZGVsNxvl4l4_sincosf, + RTLIB::impl__ZGVnN4vl4l4_sincospif, RTLIB::impl__ZGVnN2vl8l8_sincospi, + RTLIB::impl__ZGVsNxvl4l4_sincospif, + RTLIB::impl__ZGVsNxvl8l8_sincospi}) + setAvailable(Impl); + break; + case VectorLibrary::ArmPL: + for (RTLIB::LibcallImpl Impl : + {RTLIB::impl_armpl_vsincosq_f64, RTLIB::impl_armpl_vsincosq_f32, + RTLIB::impl_armpl_svsincos_f64_x, RTLIB::impl_armpl_svsincos_f32_x, + RTLIB::impl_armpl_vsincospiq_f32, RTLIB::impl_armpl_vsincospiq_f64, + RTLIB::impl_armpl_svsincospi_f32_x, + RTLIB::impl_armpl_svsincospi_f64_x}) + setAvailable(Impl); + + for (RTLIB::LibcallImpl Impl : + {RTLIB::impl_armpl_vsincosq_f64, RTLIB::impl_armpl_vsincosq_f32}) + setLibcallImplCallingConv(Impl, CallingConv::AArch64_VectorCall); + + break; + default: + break; + } +} + RuntimeLibcallsInfo::RuntimeLibcallsInfo(const Module &M) : RuntimeLibcallsInfo(M.getTargetTriple()) { // TODO: Consider module flags @@ -88,6 +132,8 @@ RuntimeLibcallsInfo::getFunctionTy(LLVMContext &Ctx, const Triple &TT, static constexpr Attribute::AttrKind CommonFnAttrs[] = { Attribute::NoCallback, Attribute::NoFree, Attribute::NoSync, Attribute::NoUnwind, Attribute::WillReturn}; + static constexpr Attribute::AttrKind CommonPtrArgAttrs[] = { + Attribute::NoAlias, Attribute::WriteOnly, Attribute::NonNull}; switch (LibcallImpl) { case RTLIB::impl___sincos_stret: @@ -151,9 +197,86 @@ RuntimeLibcallsInfo::getFunctionTy(LLVMContext &Ctx, const Triple &TT, fcNegNormal)); return {FuncTy, Attrs}; } + case RTLIB::impl__ZGVnN2vl8l8_sincos: + case RTLIB::impl__ZGVnN4vl4l4_sincosf: + case RTLIB::impl__ZGVsNxvl8l8_sincos: + case RTLIB::impl__ZGVsNxvl4l4_sincosf: + case RTLIB::impl_armpl_vsincosq_f64: + case RTLIB::impl_armpl_vsincosq_f32: + case RTLIB::impl_armpl_svsincos_f64_x: + case RTLIB::impl_armpl_svsincos_f32_x: + case RTLIB::impl__ZGVnN4vl4l4_sincospif: + case RTLIB::impl__ZGVnN2vl8l8_sincospi: + case RTLIB::impl__ZGVsNxvl4l4_sincospif: + case RTLIB::impl__ZGVsNxvl8l8_sincospi: + case RTLIB::impl_armpl_vsincospiq_f32: + case RTLIB::impl_armpl_vsincospiq_f64: + case RTLIB::impl_armpl_svsincospi_f32_x: + case RTLIB::impl_armpl_svsincospi_f64_x: { + AttrBuilder FuncAttrBuilder(Ctx); + + bool IsF32 = LibcallImpl == RTLIB::impl__ZGVnN4vl4l4_sincospif || + LibcallImpl == RTLIB::impl__ZGVsNxvl4l4_sincospif || + LibcallImpl == RTLIB::impl_armpl_vsincospiq_f32 || + LibcallImpl == RTLIB::impl_armpl_svsincospi_f32_x || + LibcallImpl == RTLIB::impl__ZGVnN4vl4l4_sincosf || + LibcallImpl == RTLIB::impl__ZGVsNxvl4l4_sincosf || + LibcallImpl == RTLIB::impl_armpl_vsincosq_f32 || + LibcallImpl == RTLIB::impl_armpl_svsincos_f32_x; + + Type *ScalarTy = IsF32 ? Type::getFloatTy(Ctx) : Type::getDoubleTy(Ctx); + unsigned EC = IsF32 ? 4 : 2; + + bool IsScalable = LibcallImpl == RTLIB::impl__ZGVsNxvl8l8_sincos || + LibcallImpl == RTLIB::impl__ZGVsNxvl4l4_sincosf || + LibcallImpl == RTLIB::impl_armpl_svsincos_f32_x || + LibcallImpl == RTLIB::impl_armpl_svsincos_f64_x || + LibcallImpl == RTLIB::impl__ZGVsNxvl4l4_sincospif || + LibcallImpl == RTLIB::impl__ZGVsNxvl8l8_sincospi || + LibcallImpl == RTLIB::impl_armpl_svsincospi_f32_x || + LibcallImpl == RTLIB::impl_armpl_svsincospi_f64_x; + VectorType *VecTy = VectorType::get(ScalarTy, EC, IsScalable); + + for (Attribute::AttrKind Attr : CommonFnAttrs) + FuncAttrBuilder.addAttribute(Attr); + FuncAttrBuilder.addMemoryAttr(MemoryEffects::argMemOnly(ModRefInfo::Mod)); + + AttributeList Attrs; + Attrs = Attrs.addFnAttributes(Ctx, FuncAttrBuilder); + + { + AttrBuilder ArgAttrBuilder(Ctx); + for (Attribute::AttrKind AK : CommonPtrArgAttrs) + ArgAttrBuilder.addAttribute(AK); + ArgAttrBuilder.addAlignmentAttr(DL.getABITypeAlign(VecTy)); + Attrs = Attrs.addParamAttributes(Ctx, 1, ArgAttrBuilder); + Attrs = Attrs.addParamAttributes(Ctx, 2, ArgAttrBuilder); + } + + PointerType *PtrTy = PointerType::get(Ctx, 0); + SmallVector<Type *, 4> ArgTys = {VecTy, PtrTy, PtrTy}; + if (hasVectorMaskArgument(LibcallImpl)) + ArgTys.push_back(VectorType::get(Type::getInt1Ty(Ctx), EC, IsScalable)); + + return {FunctionType::get(Type::getVoidTy(Ctx), ArgTys, false), Attrs}; + } default: return {}; } return {}; } + +bool RuntimeLibcallsInfo::hasVectorMaskArgument(RTLIB::LibcallImpl Impl) { + /// FIXME: This should be generated by tablegen and support the argument at an + /// arbitrary position + switch (Impl) { + case RTLIB::impl_armpl_svsincos_f32_x: + case RTLIB::impl_armpl_svsincos_f64_x: + case RTLIB::impl_armpl_svsincospi_f32_x: + case RTLIB::impl_armpl_svsincospi_f64_x: + return true; + default: + return false; + } +} diff --git a/llvm/lib/IR/SystemLibraries.cpp b/llvm/lib/IR/SystemLibraries.cpp new file mode 100644 index 0000000..fa4ac2a --- /dev/null +++ b/llvm/lib/IR/SystemLibraries.cpp @@ -0,0 +1,34 @@ +//===-----------------------------------------------------------------------==// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include "llvm/IR/SystemLibraries.h" +#include "llvm/Support/CommandLine.h" + +using namespace llvm; + +VectorLibrary llvm::ClVectorLibrary; + +static cl::opt<VectorLibrary, true> ClVectorLibraryOpt( + "vector-library", cl::Hidden, cl::desc("Vector functions library"), + cl::location(llvm::ClVectorLibrary), cl::init(VectorLibrary::NoLibrary), + cl::values( + clEnumValN(VectorLibrary::NoLibrary, "none", + "No vector functions library"), + clEnumValN(VectorLibrary::Accelerate, "Accelerate", + "Accelerate framework"), + clEnumValN(VectorLibrary::DarwinLibSystemM, "Darwin_libsystem_m", + "Darwin libsystem_m"), + clEnumValN(VectorLibrary::LIBMVEC, "LIBMVEC", + "GLIBC Vector Math library"), + clEnumValN(VectorLibrary::MASSV, "MASSV", "IBM MASS vector library"), + clEnumValN(VectorLibrary::SVML, "SVML", "Intel SVML library"), + clEnumValN(VectorLibrary::SLEEFGNUABI, "sleefgnuabi", + "SIMD Library for Evaluating Elementary Functions"), + clEnumValN(VectorLibrary::ArmPL, "ArmPL", "Arm Performance Libraries"), + clEnumValN(VectorLibrary::AMDLIBM, "AMDLIBM", + "AMD vector math library"))); diff --git a/llvm/lib/Passes/PassBuilder.cpp b/llvm/lib/Passes/PassBuilder.cpp index e0babc4..0d190ea 100644 --- a/llvm/lib/Passes/PassBuilder.cpp +++ b/llvm/lib/Passes/PassBuilder.cpp @@ -45,7 +45,6 @@ #include "llvm/Analysis/IR2Vec.h" #include "llvm/Analysis/IVUsers.h" #include "llvm/Analysis/InlineAdvisor.h" -#include "llvm/Analysis/InlineSizeEstimatorAnalysis.h" #include "llvm/Analysis/InstCount.h" #include "llvm/Analysis/KernelInfo.h" #include "llvm/Analysis/LastRunTrackingAnalysis.h" diff --git a/llvm/lib/Passes/PassBuilderPipelines.cpp b/llvm/lib/Passes/PassBuilderPipelines.cpp index 2fe963b..dd73c04 100644 --- a/llvm/lib/Passes/PassBuilderPipelines.cpp +++ b/llvm/lib/Passes/PassBuilderPipelines.cpp @@ -228,7 +228,7 @@ static cl::opt<bool> EnableLoopHeaderDuplication( static cl::opt<bool> EnableDFAJumpThreading("enable-dfa-jump-thread", cl::desc("Enable DFA jump threading"), - cl::init(true), cl::Hidden); + cl::init(false), cl::Hidden); static cl::opt<bool> EnableHotColdSplit("hot-cold-split", diff --git a/llvm/lib/Passes/PassRegistry.def b/llvm/lib/Passes/PassRegistry.def index d8305fe5..074c328 100644 --- a/llvm/lib/Passes/PassRegistry.def +++ b/llvm/lib/Passes/PassRegistry.def @@ -359,7 +359,6 @@ FUNCTION_ANALYSIS("ephemerals", EphemeralValuesAnalysis()) FUNCTION_ANALYSIS("func-properties", FunctionPropertiesAnalysis()) FUNCTION_ANALYSIS("machine-function-info", MachineFunctionAnalysis(*TM)) FUNCTION_ANALYSIS("gc-function", GCFunctionAnalysis()) -FUNCTION_ANALYSIS("inliner-size-estimator", InlineSizeEstimatorAnalysis()) FUNCTION_ANALYSIS("last-run-tracking", LastRunTrackingAnalysis()) FUNCTION_ANALYSIS("lazy-value-info", LazyValueAnalysis()) FUNCTION_ANALYSIS("loops", LoopAnalysis()) @@ -515,8 +514,6 @@ FUNCTION_PASS("print<domfrontier>", DominanceFrontierPrinterPass(errs())) FUNCTION_PASS("print<domtree>", DominatorTreePrinterPass(errs())) FUNCTION_PASS("print<func-properties>", FunctionPropertiesPrinterPass(errs())) FUNCTION_PASS("print<inline-cost>", InlineCostAnnotationPrinterPass(errs())) -FUNCTION_PASS("print<inliner-size-estimator>", - InlineSizeEstimatorAnalysisPrinterPass(errs())) FUNCTION_PASS("print<lazy-value-info>", LazyValueInfoPrinterPass(errs())) FUNCTION_PASS("print<loops>", LoopPrinterPass(errs())) FUNCTION_PASS("print<memoryssa-walker>", MemorySSAWalkerPrinterPass(errs())) diff --git a/llvm/lib/Support/SpecialCaseList.cpp b/llvm/lib/Support/SpecialCaseList.cpp index 246d90c..91f98cf 100644 --- a/llvm/lib/Support/SpecialCaseList.cpp +++ b/llvm/lib/Support/SpecialCaseList.cpp @@ -14,24 +14,94 @@ //===----------------------------------------------------------------------===// #include "llvm/Support/SpecialCaseList.h" +#include "llvm/ADT/RadixTree.h" #include "llvm/ADT/STLExtras.h" -#include "llvm/ADT/StringExtras.h" +#include "llvm/ADT/SmallVector.h" +#include "llvm/ADT/StringMap.h" #include "llvm/ADT/StringRef.h" +#include "llvm/ADT/iterator_range.h" +#include "llvm/Support/GlobPattern.h" #include "llvm/Support/LineIterator.h" #include "llvm/Support/MemoryBuffer.h" +#include "llvm/Support/Regex.h" #include "llvm/Support/VirtualFileSystem.h" -#include <algorithm> -#include <limits> +#include "llvm/Support/raw_ostream.h" #include <memory> #include <stdio.h> #include <string> #include <system_error> #include <utility> +#include <variant> +#include <vector> namespace llvm { -Error SpecialCaseList::RegexMatcher::insert(StringRef Pattern, - unsigned LineNumber) { +namespace { + +using Match = std::pair<StringRef, unsigned>; +static constexpr Match NotMatched = {"", 0}; + +// Lagacy v1 matcher. +class RegexMatcher { +public: + Error insert(StringRef Pattern, unsigned LineNumber); + void preprocess(bool BySize); + + Match match(StringRef Query) const; + + struct Reg { + Reg(StringRef Name, unsigned LineNo, Regex &&Rg) + : Name(Name), LineNo(LineNo), Rg(std::move(Rg)) {} + StringRef Name; + unsigned LineNo; + Regex Rg; + }; + + std::vector<Reg> RegExes; +}; + +class GlobMatcher { +public: + Error insert(StringRef Pattern, unsigned LineNumber); + void preprocess(bool BySize); + + Match match(StringRef Query) const; + + struct Glob { + Glob(StringRef Name, unsigned LineNo, GlobPattern &&Pattern) + : Name(Name), LineNo(LineNo), Pattern(std::move(Pattern)) {} + StringRef Name; + unsigned LineNo; + GlobPattern Pattern; + }; + + std::vector<GlobMatcher::Glob> Globs; + + RadixTree<iterator_range<StringRef::const_iterator>, + RadixTree<iterator_range<StringRef::const_reverse_iterator>, + SmallVector<int, 1>>> + PrefixSuffixToGlob; + + RadixTree<iterator_range<StringRef::const_iterator>, SmallVector<int, 1>> + SubstrToGlob; +}; + +/// Represents a set of patterns and their line numbers +class Matcher { +public: + Matcher(bool UseGlobs, bool RemoveDotSlash); + + Error insert(StringRef Pattern, unsigned LineNumber); + void preprocess(bool BySize); + Match match(StringRef Query) const; + + bool matchAny(StringRef Query) const { return match(Query).second > 0; } + + std::variant<RegexMatcher, GlobMatcher> M; + bool RemoveDotSlash; +}; + +Error RegexMatcher::insert(StringRef Pattern, unsigned LineNumber) { if (Pattern.empty()) return createStringError(errc::invalid_argument, "Supplied regex was blank"); @@ -55,7 +125,7 @@ Error SpecialCaseList::RegexMatcher::insert(StringRef Pattern, return Error::success(); } -void SpecialCaseList::RegexMatcher::preprocess(bool BySize) { +void RegexMatcher::preprocess(bool BySize) { if (BySize) { llvm::stable_sort(RegExes, [](const Reg &A, const Reg &B) { return A.Name.size() < B.Name.size(); @@ -63,16 +133,14 @@ void SpecialCaseList::RegexMatcher::preprocess(bool BySize) { } } -void SpecialCaseList::RegexMatcher::match( - StringRef Query, - llvm::function_ref<void(StringRef Rule, unsigned LineNo)> Cb) const { +Match RegexMatcher::match(StringRef Query) const { for (const auto &R : reverse(RegExes)) if (R.Rg.match(Query)) - return Cb(R.Name, R.LineNo); + return {R.Name, R.LineNo}; + return NotMatched; } -Error SpecialCaseList::GlobMatcher::insert(StringRef Pattern, - unsigned LineNumber) { +Error GlobMatcher::insert(StringRef Pattern, unsigned LineNumber) { if (Pattern.empty()) return createStringError(errc::invalid_argument, "Supplied glob was blank"); @@ -83,14 +151,14 @@ Error SpecialCaseList::GlobMatcher::insert(StringRef Pattern, return Error::success(); } -void SpecialCaseList::GlobMatcher::preprocess(bool BySize) { +void GlobMatcher::preprocess(bool BySize) { if (BySize) { llvm::stable_sort(Globs, [](const Glob &A, const Glob &B) { return A.Name.size() < B.Name.size(); }); } - for (const auto &G : reverse(Globs)) { + for (const auto &[Idx, G] : enumerate(Globs)) { StringRef Prefix = G.Pattern.prefix(); StringRef Suffix = G.Pattern.suffix(); @@ -102,26 +170,28 @@ void SpecialCaseList::GlobMatcher::preprocess(bool BySize) { // But only if substring is not empty. Searching this tree is more // expensive. auto &V = SubstrToGlob.emplace(Substr).first->second; - V.emplace_back(&G); + V.emplace_back(Idx); continue; } } auto &SToGlob = PrefixSuffixToGlob.emplace(Prefix).first->second; auto &V = SToGlob.emplace(reverse(Suffix)).first->second; - V.emplace_back(&G); + V.emplace_back(Idx); } } -void SpecialCaseList::GlobMatcher::match( - StringRef Query, - llvm::function_ref<void(StringRef Rule, unsigned LineNo)> Cb) const { +Match GlobMatcher::match(StringRef Query) const { + int Best = -1; if (!PrefixSuffixToGlob.empty()) { for (const auto &[_, SToGlob] : PrefixSuffixToGlob.find_prefixes(Query)) { for (const auto &[_, V] : SToGlob.find_prefixes(reverse(Query))) { - for (const auto *G : V) { - if (G->Pattern.match(Query)) { - Cb(G->Name, G->LineNo); + for (int Idx : reverse(V)) { + if (Best > Idx) + break; + const GlobMatcher::Glob &G = Globs[Idx]; + if (G.Pattern.match(Query)) { + Best = Idx; // As soon as we find a match in the vector, we can break for this // vector, since the globs are already sorted by priority within the // prefix group. However, we continue searching other prefix groups @@ -138,9 +208,12 @@ void SpecialCaseList::GlobMatcher::match( // possibilities. In most cases search will fail on first characters. for (StringRef Q = Query; !Q.empty(); Q = Q.drop_front()) { for (const auto &[_, V] : SubstrToGlob.find_prefixes(Q)) { - for (const auto *G : V) { - if (G->Pattern.match(Query)) { - Cb(G->Name, G->LineNo); + for (int Idx : reverse(V)) { + if (Best > Idx) + break; + const GlobMatcher::Glob &G = Globs[Idx]; + if (G.Pattern.match(Query)) { + Best = Idx; // As soon as we find a match in the vector, we can break for this // vector, since the globs are already sorted by priority within the // prefix group. However, we continue searching other prefix groups @@ -151,9 +224,12 @@ void SpecialCaseList::GlobMatcher::match( } } } + if (Best < 0) + return NotMatched; + return {Globs[Best].Name, Globs[Best].LineNo}; } -SpecialCaseList::Matcher::Matcher(bool UseGlobs, bool RemoveDotSlash) +Matcher::Matcher(bool UseGlobs, bool RemoveDotSlash) : RemoveDotSlash(RemoveDotSlash) { if (UseGlobs) M.emplace<GlobMatcher>(); @@ -161,21 +237,34 @@ SpecialCaseList::Matcher::Matcher(bool UseGlobs, bool RemoveDotSlash) M.emplace<RegexMatcher>(); } -Error SpecialCaseList::Matcher::insert(StringRef Pattern, unsigned LineNumber) { +Error Matcher::insert(StringRef Pattern, unsigned LineNumber) { return std::visit([&](auto &V) { return V.insert(Pattern, LineNumber); }, M); } -void SpecialCaseList::Matcher::preprocess(bool BySize) { +void Matcher::preprocess(bool BySize) { return std::visit([&](auto &V) { return V.preprocess(BySize); }, M); } -void SpecialCaseList::Matcher::match( - StringRef Query, - llvm::function_ref<void(StringRef Rule, unsigned LineNo)> Cb) const { +Match Matcher::match(StringRef Query) const { if (RemoveDotSlash) Query = llvm::sys::path::remove_leading_dotslash(Query); - return std::visit([&](auto &V) { return V.match(Query, Cb); }, M); + return std::visit([&](auto &V) -> Match { return V.match(Query); }, M); } +} // namespace + +class SpecialCaseList::Section::SectionImpl { +public: + void preprocess(bool OrderBySize); + const Matcher *findMatcher(StringRef Prefix, StringRef Category) const; + + using SectionEntries = StringMap<StringMap<Matcher>>; + + explicit SectionImpl(bool UseGlobs) + : SectionMatcher(UseGlobs, /*RemoveDotSlash=*/false) {} + + Matcher SectionMatcher; + SectionEntries Entries; +}; // TODO: Refactor this to return Expected<...> std::unique_ptr<SpecialCaseList> @@ -233,11 +322,11 @@ bool SpecialCaseList::createInternal(const MemoryBuffer *MB, std::string &Error, Expected<SpecialCaseList::Section *> SpecialCaseList::addSection(StringRef SectionStr, unsigned FileNo, unsigned LineNo, bool UseGlobs) { + SectionStr = SectionStr.copy(StrAlloc); Sections.emplace_back(SectionStr, FileNo, UseGlobs); auto &Section = Sections.back(); - SectionStr = SectionStr.copy(StrAlloc); - if (auto Err = Section.SectionMatcher.insert(SectionStr, LineNo)) { + if (auto Err = Section.Impl->SectionMatcher.insert(SectionStr, LineNo)) { return createStringError(errc::invalid_argument, "malformed section at line " + Twine(LineNo) + ": '" + SectionStr + @@ -264,11 +353,12 @@ bool SpecialCaseList::parse(unsigned FileIdx, const MemoryBuffer *MB, bool RemoveDotSlash = Version > 2; - Section *CurrentSection; - if (auto Err = addSection("*", FileIdx, 1, true).moveInto(CurrentSection)) { + auto ErrOrSection = addSection("*", FileIdx, 1, true); + if (auto Err = ErrOrSection.takeError()) { Error = toString(std::move(Err)); return false; } + Section::SectionImpl *CurrentImpl = ErrOrSection.get()->Impl.get(); // This is the current list of prefixes for all existing users matching file // path. We may need parametrization in constructor in future. @@ -290,12 +380,13 @@ bool SpecialCaseList::parse(unsigned FileIdx, const MemoryBuffer *MB, return false; } - if (auto Err = addSection(Line.drop_front().drop_back(), FileIdx, LineNo, - UseGlobs) - .moveInto(CurrentSection)) { + auto ErrOrSection = + addSection(Line.drop_front().drop_back(), FileIdx, LineNo, UseGlobs); + if (auto Err = ErrOrSection.takeError()) { Error = toString(std::move(Err)); return false; } + CurrentImpl = ErrOrSection.get()->Impl.get(); continue; } @@ -308,7 +399,7 @@ bool SpecialCaseList::parse(unsigned FileIdx, const MemoryBuffer *MB, } auto [Pattern, Category] = Postfix.split("="); - auto [It, _] = CurrentSection->Entries[Prefix].try_emplace( + auto [It, _] = CurrentImpl->Entries[Prefix].try_emplace( Category, UseGlobs, RemoveDotSlash && llvm::is_contained(PathPrefixes, Prefix)); Pattern = Pattern.copy(StrAlloc); @@ -322,7 +413,7 @@ bool SpecialCaseList::parse(unsigned FileIdx, const MemoryBuffer *MB, } for (Section &S : Sections) - S.preprocess(OrderBySize); + S.Impl->preprocess(OrderBySize); return true; } @@ -339,7 +430,7 @@ std::pair<unsigned, unsigned> SpecialCaseList::inSectionBlame(StringRef Section, StringRef Prefix, StringRef Query, StringRef Category) const { for (const auto &S : reverse(Sections)) { - if (S.SectionMatcher.matchAny(Section)) { + if (S.Impl->SectionMatcher.matchAny(Section)) { unsigned Blame = S.getLastMatch(Prefix, Query, Category); if (Blame) return {S.FileIdx, Blame}; @@ -348,9 +439,22 @@ SpecialCaseList::inSectionBlame(StringRef Section, StringRef Prefix, return NotFound; } -const SpecialCaseList::Matcher * -SpecialCaseList::Section::findMatcher(StringRef Prefix, - StringRef Category) const { +SpecialCaseList::Section::Section(StringRef Str, unsigned FileIdx, + bool UseGlobs) + : Name(Str), FileIdx(FileIdx), + Impl(std::make_unique<SectionImpl>(UseGlobs)) {} + +SpecialCaseList::Section::Section(Section &&) = default; + +SpecialCaseList::Section::~Section() = default; + +bool SpecialCaseList::Section::matchName(StringRef Name) const { + return Impl->SectionMatcher.matchAny(Name); +} + +const Matcher * +SpecialCaseList::Section::SectionImpl::findMatcher(StringRef Prefix, + StringRef Category) const { SectionEntries::const_iterator I = Entries.find(Prefix); if (I == Entries.end()) return nullptr; @@ -361,7 +465,7 @@ SpecialCaseList::Section::findMatcher(StringRef Prefix, return &II->second; } -LLVM_ABI void SpecialCaseList::Section::preprocess(bool OrderBySize) { +void SpecialCaseList::Section::SectionImpl::preprocess(bool OrderBySize) { SectionMatcher.preprocess(false); for (auto &[K1, E] : Entries) for (auto &[K2, M] : E) @@ -371,26 +475,21 @@ LLVM_ABI void SpecialCaseList::Section::preprocess(bool OrderBySize) { unsigned SpecialCaseList::Section::getLastMatch(StringRef Prefix, StringRef Query, StringRef Category) const { - unsigned LastLine = 0; - if (const Matcher *M = findMatcher(Prefix, Category)) { - M->match(Query, [&](StringRef, unsigned LineNo) { - LastLine = std::max(LastLine, LineNo); - }); - } - return LastLine; + if (const Matcher *M = Impl->findMatcher(Prefix, Category)) + return M->match(Query).second; + return 0; } StringRef SpecialCaseList::Section::getLongestMatch(StringRef Prefix, StringRef Query, StringRef Category) const { - StringRef LongestRule; - if (const Matcher *M = findMatcher(Prefix, Category)) { - M->match(Query, [&](StringRef Rule, unsigned) { - if (LongestRule.size() < Rule.size()) - LongestRule = Rule; - }); - } - return LongestRule; + if (const Matcher *M = Impl->findMatcher(Prefix, Category)) + return M->match(Query).first; + return {}; +} + +bool SpecialCaseList::Section::hasPrefix(StringRef Prefix) const { + return Impl->Entries.find(Prefix) != Impl->Entries.end(); } } // namespace llvm diff --git a/llvm/lib/Target/AArch64/AArch64ConditionalCompares.cpp b/llvm/lib/Target/AArch64/AArch64ConditionalCompares.cpp index cb831963..7712d2a 100644 --- a/llvm/lib/Target/AArch64/AArch64ConditionalCompares.cpp +++ b/llvm/lib/Target/AArch64/AArch64ConditionalCompares.cpp @@ -629,8 +629,7 @@ void SSACCmpConv::convert(SmallVectorImpl<MachineBasicBlock *> &RemovedBlocks) { } const MCInstrDesc &MCID = TII->get(Opc); // Create a dummy virtual register for the SUBS def. - Register DestReg = - MRI->createVirtualRegister(TII->getRegClass(MCID, 0, TRI)); + Register DestReg = MRI->createVirtualRegister(TII->getRegClass(MCID, 0)); // Insert a SUBS Rn, #0 instruction instead of the cbz / cbnz. BuildMI(*Head, Head->end(), TermDL, MCID) .addReg(DestReg, RegState::Define | RegState::Dead) @@ -638,8 +637,7 @@ void SSACCmpConv::convert(SmallVectorImpl<MachineBasicBlock *> &RemovedBlocks) { .addImm(0) .addImm(0); // SUBS uses the GPR*sp register classes. - MRI->constrainRegClass(HeadCond[2].getReg(), - TII->getRegClass(MCID, 1, TRI)); + MRI->constrainRegClass(HeadCond[2].getReg(), TII->getRegClass(MCID, 1)); } Head->splice(Head->end(), CmpBB, CmpBB->begin(), CmpBB->end()); @@ -686,10 +684,10 @@ void SSACCmpConv::convert(SmallVectorImpl<MachineBasicBlock *> &RemovedBlocks) { unsigned NZCV = AArch64CC::getNZCVToSatisfyCondCode(CmpBBTailCC); const MCInstrDesc &MCID = TII->get(Opc); MRI->constrainRegClass(CmpMI->getOperand(FirstOp).getReg(), - TII->getRegClass(MCID, 0, TRI)); + TII->getRegClass(MCID, 0)); if (CmpMI->getOperand(FirstOp + 1).isReg()) MRI->constrainRegClass(CmpMI->getOperand(FirstOp + 1).getReg(), - TII->getRegClass(MCID, 1, TRI)); + TII->getRegClass(MCID, 1)); MachineInstrBuilder MIB = BuildMI(*Head, CmpMI, CmpMI->getDebugLoc(), MCID) .add(CmpMI->getOperand(FirstOp)); // Register Rn if (isZBranch) diff --git a/llvm/lib/Target/AArch64/AArch64DeadRegisterDefinitionsPass.cpp b/llvm/lib/Target/AArch64/AArch64DeadRegisterDefinitionsPass.cpp index 75361f5..4ff49a6 100644 --- a/llvm/lib/Target/AArch64/AArch64DeadRegisterDefinitionsPass.cpp +++ b/llvm/lib/Target/AArch64/AArch64DeadRegisterDefinitionsPass.cpp @@ -156,7 +156,7 @@ void AArch64DeadRegisterDefinitions::processMachineBasicBlock( LLVM_DEBUG(dbgs() << " Ignoring, def is tied operand.\n"); continue; } - const TargetRegisterClass *RC = TII->getRegClass(Desc, I, TRI); + const TargetRegisterClass *RC = TII->getRegClass(Desc, I); unsigned NewReg; if (RC == nullptr) { LLVM_DEBUG(dbgs() << " Ignoring, register is not a GPR.\n"); diff --git a/llvm/lib/Target/AArch64/AArch64InstrInfo.cpp b/llvm/lib/Target/AArch64/AArch64InstrInfo.cpp index 4b40733..b93e562 100644 --- a/llvm/lib/Target/AArch64/AArch64InstrInfo.cpp +++ b/llvm/lib/Target/AArch64/AArch64InstrInfo.cpp @@ -91,7 +91,7 @@ static cl::opt<unsigned> GatherOptSearchLimit( "machine-combiner gather pattern optimization")); AArch64InstrInfo::AArch64InstrInfo(const AArch64Subtarget &STI) - : AArch64GenInstrInfo(STI, AArch64::ADJCALLSTACKDOWN, + : AArch64GenInstrInfo(STI, RI, AArch64::ADJCALLSTACKDOWN, AArch64::ADJCALLSTACKUP, AArch64::CATCHRET), RI(STI.getTargetTriple(), STI.getHwMode()), Subtarget(STI) {} @@ -1780,6 +1780,16 @@ static unsigned sForm(MachineInstr &Instr) { case AArch64::SUBSWri: case AArch64::SUBSXrr: case AArch64::SUBSXri: + case AArch64::ANDSWri: + case AArch64::ANDSWrr: + case AArch64::ANDSWrs: + case AArch64::ANDSXri: + case AArch64::ANDSXrr: + case AArch64::ANDSXrs: + case AArch64::BICSWrr: + case AArch64::BICSXrr: + case AArch64::BICSWrs: + case AArch64::BICSXrs: return Instr.getOpcode(); case AArch64::ADDWrr: @@ -1810,6 +1820,22 @@ static unsigned sForm(MachineInstr &Instr) { return AArch64::ANDSWri; case AArch64::ANDXri: return AArch64::ANDSXri; + case AArch64::ANDWrr: + return AArch64::ANDSWrr; + case AArch64::ANDWrs: + return AArch64::ANDSWrs; + case AArch64::ANDXrr: + return AArch64::ANDSXrr; + case AArch64::ANDXrs: + return AArch64::ANDSXrs; + case AArch64::BICWrr: + return AArch64::BICSWrr; + case AArch64::BICXrr: + return AArch64::BICSXrr; + case AArch64::BICWrs: + return AArch64::BICSWrs; + case AArch64::BICXrs: + return AArch64::BICSXrs; } } @@ -1947,6 +1973,25 @@ static bool isSUBSRegImm(unsigned Opcode) { return Opcode == AArch64::SUBSWri || Opcode == AArch64::SUBSXri; } +static bool isANDOpcode(MachineInstr &MI) { + unsigned Opc = sForm(MI); + switch (Opc) { + case AArch64::ANDSWri: + case AArch64::ANDSWrr: + case AArch64::ANDSWrs: + case AArch64::ANDSXri: + case AArch64::ANDSXrr: + case AArch64::ANDSXrs: + case AArch64::BICSWrr: + case AArch64::BICSXrr: + case AArch64::BICSWrs: + case AArch64::BICSXrs: + return true; + default: + return false; + } +} + /// Check if CmpInstr can be substituted by MI. /// /// CmpInstr can be substituted: @@ -1984,7 +2029,8 @@ static bool canInstrSubstituteCmpInstr(MachineInstr &MI, MachineInstr &CmpInstr, // 1) MI and CmpInstr set N and V to the same value. // 2) If MI is add/sub with no-signed-wrap, it produces a poison value when // signed overflow occurs, so CmpInstr could still be simplified away. - if (NZVCUsed->V && !MI.getFlag(MachineInstr::NoSWrap)) + // Note that Ands and Bics instructions always clear the V flag. + if (NZVCUsed->V && !MI.getFlag(MachineInstr::NoSWrap) && !isANDOpcode(MI)) return false; AccessKind AccessToCheck = AK_Write; @@ -5618,7 +5664,6 @@ void AArch64InstrInfo::storeRegToStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI, Register SrcReg, bool isKill, int FI, const TargetRegisterClass *RC, - const TargetRegisterInfo *TRI, Register VReg, MachineInstr::MIFlag Flags) const { MachineFunction &MF = *MBB.getParent(); @@ -5632,7 +5677,7 @@ void AArch64InstrInfo::storeRegToStackSlot(MachineBasicBlock &MBB, bool Offset = true; MCRegister PNRReg = MCRegister::NoRegister; unsigned StackID = TargetStackID::Default; - switch (TRI->getSpillSize(*RC)) { + switch (RI.getSpillSize(*RC)) { case 1: if (AArch64::FPR8RegClass.hasSubClassEq(RC)) Opc = AArch64::STRBui; @@ -5795,10 +5840,12 @@ static void loadRegPairFromStackSlot(const TargetRegisterInfo &TRI, .addMemOperand(MMO); } -void AArch64InstrInfo::loadRegFromStackSlot( - MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI, Register DestReg, - int FI, const TargetRegisterClass *RC, const TargetRegisterInfo *TRI, - Register VReg, MachineInstr::MIFlag Flags) const { +void AArch64InstrInfo::loadRegFromStackSlot(MachineBasicBlock &MBB, + MachineBasicBlock::iterator MBBI, + Register DestReg, int FI, + const TargetRegisterClass *RC, + Register VReg, + MachineInstr::MIFlag Flags) const { MachineFunction &MF = *MBB.getParent(); MachineFrameInfo &MFI = MF.getFrameInfo(); MachinePointerInfo PtrInfo = MachinePointerInfo::getFixedStack(MF, FI); @@ -5810,7 +5857,7 @@ void AArch64InstrInfo::loadRegFromStackSlot( bool Offset = true; unsigned StackID = TargetStackID::Default; Register PNRReg = MCRegister::NoRegister; - switch (TRI->getSpillSize(*RC)) { + switch (TRI.getSpillSize(*RC)) { case 1: if (AArch64::FPR8RegClass.hasSubClassEq(RC)) Opc = AArch64::LDRBui; @@ -6446,10 +6493,10 @@ MachineInstr *AArch64InstrInfo::foldMemoryOperandImpl( "Mismatched register size in non subreg COPY"); if (IsSpill) storeRegToStackSlot(MBB, InsertPt, SrcReg, SrcMO.isKill(), FrameIndex, - getRegClass(SrcReg), &TRI, Register()); + getRegClass(SrcReg), Register()); else loadRegFromStackSlot(MBB, InsertPt, DstReg, FrameIndex, - getRegClass(DstReg), &TRI, Register()); + getRegClass(DstReg), Register()); return &*--InsertPt; } @@ -6467,8 +6514,7 @@ MachineInstr *AArch64InstrInfo::foldMemoryOperandImpl( assert(SrcMO.getSubReg() == 0 && "Unexpected subreg on physical register"); storeRegToStackSlot(MBB, InsertPt, AArch64::XZR, SrcMO.isKill(), - FrameIndex, &AArch64::GPR64RegClass, &TRI, - Register()); + FrameIndex, &AArch64::GPR64RegClass, Register()); return &*--InsertPt; } @@ -6502,7 +6548,7 @@ MachineInstr *AArch64InstrInfo::foldMemoryOperandImpl( assert(TRI.getRegSizeInBits(*getRegClass(SrcReg)) == TRI.getRegSizeInBits(*FillRC) && "Mismatched regclass size on folded subreg COPY"); - loadRegFromStackSlot(MBB, InsertPt, DstReg, FrameIndex, FillRC, &TRI, + loadRegFromStackSlot(MBB, InsertPt, DstReg, FrameIndex, FillRC, Register()); MachineInstr &LoadMI = *--InsertPt; MachineOperand &LoadDst = LoadMI.getOperand(0); @@ -9590,6 +9636,27 @@ AArch64InstrInfo::getOutliningCandidateInfo( unsigned NumBytesToCreateFrame = 0; + // Avoid splitting ADRP ADD/LDR pair into outlined functions. + // These instructions are fused together by the scheduler. + // Any candidate where ADRP is the last instruction should be rejected + // as that will lead to splitting ADRP pair. + MachineInstr &LastMI = RepeatedSequenceLocs[0].back(); + MachineInstr &FirstMI = RepeatedSequenceLocs[0].front(); + if (LastMI.getOpcode() == AArch64::ADRP && + (LastMI.getOperand(1).getTargetFlags() & AArch64II::MO_PAGE) != 0 && + (LastMI.getOperand(1).getTargetFlags() & AArch64II::MO_GOT) != 0) { + return std::nullopt; + } + + // Similarly any candidate where the first instruction is ADD/LDR with a + // page offset should be rejected to avoid ADRP splitting. + if ((FirstMI.getOpcode() == AArch64::ADDXri || + FirstMI.getOpcode() == AArch64::LDRXui) && + (FirstMI.getOperand(2).getTargetFlags() & AArch64II::MO_PAGEOFF) != 0 && + (FirstMI.getOperand(2).getTargetFlags() & AArch64II::MO_GOT) != 0) { + return std::nullopt; + } + // We only allow outlining for functions having exactly matching return // address signing attributes, i.e., all share the same value for the // attribute "sign-return-address" and all share the same type of key they @@ -10996,8 +11063,6 @@ static Register cloneInstr(const MachineInstr *MI, unsigned ReplaceOprNum, MachineBasicBlock::iterator InsertTo) { MachineRegisterInfo &MRI = MBB.getParent()->getRegInfo(); const TargetInstrInfo *TII = MBB.getParent()->getSubtarget().getInstrInfo(); - const TargetRegisterInfo *TRI = - MBB.getParent()->getSubtarget().getRegisterInfo(); MachineInstr *NewMI = MBB.getParent()->CloneMachineInstr(MI); Register Result = 0; for (unsigned I = 0; I < NewMI->getNumOperands(); ++I) { @@ -11006,8 +11071,7 @@ static Register cloneInstr(const MachineInstr *MI, unsigned ReplaceOprNum, MRI.getRegClass(NewMI->getOperand(0).getReg())); NewMI->getOperand(I).setReg(Result); } else if (I == ReplaceOprNum) { - MRI.constrainRegClass(ReplaceReg, - TII->getRegClass(NewMI->getDesc(), I, TRI)); + MRI.constrainRegClass(ReplaceReg, TII->getRegClass(NewMI->getDesc(), I)); NewMI->getOperand(I).setReg(ReplaceReg); } } diff --git a/llvm/lib/Target/AArch64/AArch64InstrInfo.h b/llvm/lib/Target/AArch64/AArch64InstrInfo.h index 179574a..979c9ac 100644 --- a/llvm/lib/Target/AArch64/AArch64InstrInfo.h +++ b/llvm/lib/Target/AArch64/AArch64InstrInfo.h @@ -353,14 +353,13 @@ public: void storeRegToStackSlot( MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI, Register SrcReg, - bool isKill, int FrameIndex, const TargetRegisterClass *RC, - const TargetRegisterInfo *TRI, Register VReg, + bool isKill, int FrameIndex, const TargetRegisterClass *RC, Register VReg, MachineInstr::MIFlag Flags = MachineInstr::NoFlags) const override; void loadRegFromStackSlot( MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI, Register DestReg, int FrameIndex, const TargetRegisterClass *RC, - const TargetRegisterInfo *TRI, Register VReg, + Register VReg, MachineInstr::MIFlag Flags = MachineInstr::NoFlags) const override; // This tells target independent code that it is okay to pass instructions diff --git a/llvm/lib/Target/AArch64/AArch64MIPeepholeOpt.cpp b/llvm/lib/Target/AArch64/AArch64MIPeepholeOpt.cpp index 04e76c7..d25db89 100644 --- a/llvm/lib/Target/AArch64/AArch64MIPeepholeOpt.cpp +++ b/llvm/lib/Target/AArch64/AArch64MIPeepholeOpt.cpp @@ -595,17 +595,17 @@ bool AArch64MIPeepholeOpt::splitTwoPartImm( // Determine register classes for destinations and register operands const TargetRegisterClass *FirstInstrDstRC = - TII->getRegClass(TII->get(Opcode.first), 0, TRI); + TII->getRegClass(TII->get(Opcode.first), 0); const TargetRegisterClass *FirstInstrOperandRC = - TII->getRegClass(TII->get(Opcode.first), 1, TRI); + TII->getRegClass(TII->get(Opcode.first), 1); const TargetRegisterClass *SecondInstrDstRC = (Opcode.first == Opcode.second) ? FirstInstrDstRC - : TII->getRegClass(TII->get(Opcode.second), 0, TRI); + : TII->getRegClass(TII->get(Opcode.second), 0); const TargetRegisterClass *SecondInstrOperandRC = (Opcode.first == Opcode.second) ? FirstInstrOperandRC - : TII->getRegClass(TII->get(Opcode.second), 1, TRI); + : TII->getRegClass(TII->get(Opcode.second), 1); // Get old registers destinations and new register destinations Register DstReg = MI.getOperand(0).getReg(); @@ -784,14 +784,14 @@ bool AArch64MIPeepholeOpt::visitUBFMXri(MachineInstr &MI) { } const TargetRegisterClass *DstRC64 = - TII->getRegClass(TII->get(MI.getOpcode()), 0, TRI); + TII->getRegClass(TII->get(MI.getOpcode()), 0); const TargetRegisterClass *DstRC32 = TRI->getSubRegisterClass(DstRC64, AArch64::sub_32); assert(DstRC32 && "Destination register class of UBFMXri doesn't have a " "sub_32 subregister class"); const TargetRegisterClass *SrcRC64 = - TII->getRegClass(TII->get(MI.getOpcode()), 1, TRI); + TII->getRegClass(TII->get(MI.getOpcode()), 1); const TargetRegisterClass *SrcRC32 = TRI->getSubRegisterClass(SrcRC64, AArch64::sub_32); assert(SrcRC32 && "Source register class of UBFMXri doesn't have a sub_32 " diff --git a/llvm/lib/Target/AArch64/AArch64RegisterInfo.cpp b/llvm/lib/Target/AArch64/AArch64RegisterInfo.cpp index a5048b9..f3cf222 100644 --- a/llvm/lib/Target/AArch64/AArch64RegisterInfo.cpp +++ b/llvm/lib/Target/AArch64/AArch64RegisterInfo.cpp @@ -897,7 +897,7 @@ AArch64RegisterInfo::materializeFrameBaseRegister(MachineBasicBlock *MBB, const MCInstrDesc &MCID = TII->get(AArch64::ADDXri); MachineRegisterInfo &MRI = MBB->getParent()->getRegInfo(); Register BaseReg = MRI.createVirtualRegister(&AArch64::GPR64spRegClass); - MRI.constrainRegClass(BaseReg, TII->getRegClass(MCID, 0, this)); + MRI.constrainRegClass(BaseReg, TII->getRegClass(MCID, 0)); unsigned Shifter = AArch64_AM::getShifterImm(AArch64_AM::LSL, 0); BuildMI(*MBB, Ins, DL, MCID, BaseReg) @@ -1123,24 +1123,85 @@ unsigned AArch64RegisterInfo::getRegPressureLimit(const TargetRegisterClass *RC, } } -// FORM_TRANSPOSED_REG_TUPLE nodes are created to improve register allocation -// where a consecutive multi-vector tuple is constructed from the same indices -// of multiple strided loads. This may still result in unnecessary copies -// between the loads and the tuple. Here we try to return a hint to assign the -// contiguous ZPRMulReg starting at the same register as the first operand of -// the pseudo, which should be a subregister of the first strided load. +// We add regalloc hints for different cases: +// * Choosing a better destination operand for predicated SVE instructions +// where the inactive lanes are undef, by choosing a register that is not +// unique to the other operands of the instruction. // -// For example, if the first strided load has been assigned $z16_z20_z24_z28 -// and the operands of the pseudo are each accessing subregister zsub2, we -// should look through through Order to find a contiguous register which -// begins with $z24 (i.e. $z24_z25_z26_z27). +// * Improve register allocation for SME multi-vector instructions where we can +// benefit from the strided- and contiguous register multi-vector tuples. // +// Here FORM_TRANSPOSED_REG_TUPLE nodes are created to improve register +// allocation where a consecutive multi-vector tuple is constructed from the +// same indices of multiple strided loads. This may still result in +// unnecessary copies between the loads and the tuple. Here we try to return a +// hint to assign the contiguous ZPRMulReg starting at the same register as +// the first operand of the pseudo, which should be a subregister of the first +// strided load. +// +// For example, if the first strided load has been assigned $z16_z20_z24_z28 +// and the operands of the pseudo are each accessing subregister zsub2, we +// should look through through Order to find a contiguous register which +// begins with $z24 (i.e. $z24_z25_z26_z27). bool AArch64RegisterInfo::getRegAllocationHints( Register VirtReg, ArrayRef<MCPhysReg> Order, SmallVectorImpl<MCPhysReg> &Hints, const MachineFunction &MF, const VirtRegMap *VRM, const LiveRegMatrix *Matrix) const { - auto &ST = MF.getSubtarget<AArch64Subtarget>(); + const AArch64InstrInfo *TII = + MF.getSubtarget<AArch64Subtarget>().getInstrInfo(); + const MachineRegisterInfo &MRI = MF.getRegInfo(); + + // For predicated SVE instructions where the inactive lanes are undef, + // pick a destination register that is not unique to avoid introducing + // a movprfx. + const TargetRegisterClass *RegRC = MRI.getRegClass(VirtReg); + if (AArch64::ZPRRegClass.hasSubClassEq(RegRC)) { + for (const MachineOperand &DefOp : MRI.def_operands(VirtReg)) { + const MachineInstr &Def = *DefOp.getParent(); + if (DefOp.isImplicit() || + (TII->get(Def.getOpcode()).TSFlags & AArch64::FalseLanesMask) != + AArch64::FalseLanesUndef) + continue; + + unsigned InstFlags = + TII->get(AArch64::getSVEPseudoMap(Def.getOpcode())).TSFlags; + + for (MCPhysReg R : Order) { + auto AddHintIfSuitable = [&](MCPhysReg R, const MachineOperand &MO) { + // R is a suitable register hint if there exists an operand for the + // instruction that is not yet allocated a register or if R matches + // one of the other source operands. + if (!VRM->hasPhys(MO.getReg()) || VRM->getPhys(MO.getReg()) == R) + Hints.push_back(R); + }; + + switch (InstFlags & AArch64::DestructiveInstTypeMask) { + default: + break; + case AArch64::DestructiveTernaryCommWithRev: + AddHintIfSuitable(R, Def.getOperand(2)); + AddHintIfSuitable(R, Def.getOperand(3)); + AddHintIfSuitable(R, Def.getOperand(4)); + break; + case AArch64::DestructiveBinaryComm: + case AArch64::DestructiveBinaryCommWithRev: + AddHintIfSuitable(R, Def.getOperand(2)); + AddHintIfSuitable(R, Def.getOperand(3)); + break; + case AArch64::DestructiveBinary: + case AArch64::DestructiveBinaryImm: + AddHintIfSuitable(R, Def.getOperand(2)); + break; + } + } + } + + if (Hints.size()) + return TargetRegisterInfo::getRegAllocationHints(VirtReg, Order, Hints, + MF, VRM); + } + if (!ST.hasSME() || !ST.isStreaming()) return TargetRegisterInfo::getRegAllocationHints(VirtReg, Order, Hints, MF, VRM); @@ -1153,8 +1214,7 @@ bool AArch64RegisterInfo::getRegAllocationHints( // FORM_TRANSPOSED_REG_TUPLE pseudo, we want to favour reducing copy // instructions over reducing the number of clobbered callee-save registers, // so we add the strided registers as a hint. - const MachineRegisterInfo &MRI = MF.getRegInfo(); - unsigned RegID = MRI.getRegClass(VirtReg)->getID(); + unsigned RegID = RegRC->getID(); if (RegID == AArch64::ZPR2StridedOrContiguousRegClassID || RegID == AArch64::ZPR4StridedOrContiguousRegClassID) { diff --git a/llvm/lib/Target/AArch64/AArch64Subtarget.h b/llvm/lib/Target/AArch64/AArch64Subtarget.h index 8974965..ab4004e 100644 --- a/llvm/lib/Target/AArch64/AArch64Subtarget.h +++ b/llvm/lib/Target/AArch64/AArch64Subtarget.h @@ -157,7 +157,7 @@ public: bool enableMachineScheduler() const override { return true; } bool enablePostRAScheduler() const override { return usePostRAScheduler(); } bool enableSubRegLiveness() const override { return EnableSubregLiveness; } - + bool enableTerminalRule() const override { return true; } bool enableMachinePipeliner() const override; bool useDFAforSMS() const override { return false; } diff --git a/llvm/lib/Target/AMDGPU/AMDGPU.td b/llvm/lib/Target/AMDGPU/AMDGPU.td index 54d94b1..0b61adf 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPU.td +++ b/llvm/lib/Target/AMDGPU/AMDGPU.td @@ -2069,6 +2069,7 @@ def FeatureISAVersion12 : FeatureSet< FeatureMemoryAtomicFAddF32DenormalSupport, FeatureBVHDualAndBVH8Insts, FeatureWaitsBeforeSystemScopeStores, + FeatureD16Writes32BitVgpr ]>; def FeatureISAVersion12_50 : FeatureSet< @@ -2143,6 +2144,7 @@ def FeatureISAVersion12_50 : FeatureSet< FeatureSupportsXNACK, FeatureXNACK, FeatureClusters, + FeatureD16Writes32BitVgpr, ]>; def FeatureISAVersion12_51 : FeatureSet< diff --git a/llvm/lib/Target/AMDGPU/AMDGPURegisterBankInfo.cpp b/llvm/lib/Target/AMDGPU/AMDGPURegisterBankInfo.cpp index 54ba2f8..dbca5af 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPURegisterBankInfo.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPURegisterBankInfo.cpp @@ -5081,17 +5081,17 @@ AMDGPURegisterBankInfo::getInstrMapping(const MachineInstr &MI) const { unsigned MinNumRegsRequired = DstSize / 32; const SIMachineFunctionInfo *Info = MF.getInfo<SIMachineFunctionInfo>(); + bool UseAGPRForm = Info->selectAGPRFormMFMA(MinNumRegsRequired); + OpdsMapping[0] = - Info->getMinNumAGPRs() >= MinNumRegsRequired - ? getAGPROpMapping(MI.getOperand(0).getReg(), MRI, *TRI) - : getVGPROpMapping(MI.getOperand(0).getReg(), MRI, *TRI); + UseAGPRForm ? getAGPROpMapping(MI.getOperand(0).getReg(), MRI, *TRI) + : getVGPROpMapping(MI.getOperand(0).getReg(), MRI, *TRI); OpdsMapping[2] = getVGPROpMapping(MI.getOperand(2).getReg(), MRI, *TRI); OpdsMapping[3] = getVGPROpMapping(MI.getOperand(3).getReg(), MRI, *TRI); OpdsMapping[4] = - Info->getMinNumAGPRs() >= MinNumRegsRequired - ? getAGPROpMapping(MI.getOperand(4).getReg(), MRI, *TRI) - : getVGPROpMapping(MI.getOperand(4).getReg(), MRI, *TRI); + UseAGPRForm ? getAGPROpMapping(MI.getOperand(4).getReg(), MRI, *TRI) + : getVGPROpMapping(MI.getOperand(4).getReg(), MRI, *TRI); OpdsMapping[8] = getVGPROpMapping(MI.getOperand(8).getReg(), MRI, *TRI); OpdsMapping[10] = getVGPROpMapping(MI.getOperand(10).getReg(), MRI, *TRI); diff --git a/llvm/lib/Target/AMDGPU/AMDGPUResourceUsageAnalysis.cpp b/llvm/lib/Target/AMDGPU/AMDGPUResourceUsageAnalysis.cpp index 0ea9add..b03d50f 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPUResourceUsageAnalysis.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPUResourceUsageAnalysis.cpp @@ -261,13 +261,6 @@ AMDGPUResourceUsageAnalysisImpl::analyzeResourceUsage( const Function *Callee = getCalleeFunction(*CalleeOp); - // Avoid crashing on undefined behavior with an illegal call to a - // kernel. If a callsite's calling convention doesn't match the - // function's, it's undefined behavior. If the callsite calling - // convention does match, that would have errored earlier. - if (Callee && AMDGPU::isEntryFunctionCC(Callee->getCallingConv())) - report_fatal_error("invalid call to entry function"); - auto isSameFunction = [](const MachineFunction &MF, const Function *F) { return F == &MF.getFunction(); }; diff --git a/llvm/lib/Target/AMDGPU/GCNSchedStrategy.cpp b/llvm/lib/Target/AMDGPU/GCNSchedStrategy.cpp index 9fbf9e5..23ba4ad 100644 --- a/llvm/lib/Target/AMDGPU/GCNSchedStrategy.cpp +++ b/llvm/lib/Target/AMDGPU/GCNSchedStrategy.cpp @@ -2011,7 +2011,7 @@ void PreRARematStage::rematerialize() { // Rematerialize DefMI to its use block. TII->reMaterialize(*InsertPos->getParent(), InsertPos, Reg, - AMDGPU::NoSubRegister, *DefMI, *DAG.TRI); + AMDGPU::NoSubRegister, *DefMI); Remat.RematMI = &*std::prev(InsertPos); DAG.LIS->InsertMachineInstrInMaps(*Remat.RematMI); @@ -2163,8 +2163,7 @@ void PreRARematStage::finalizeGCNSchedStage() { // Re-rematerialize MI at the end of its original region. Note that it may // not be rematerialized exactly in the same position as originally within // the region, but it should not matter much. - TII->reMaterialize(*MBB, InsertPos, Reg, AMDGPU::NoSubRegister, RematMI, - *DAG.TRI); + TII->reMaterialize(*MBB, InsertPos, Reg, AMDGPU::NoSubRegister, RematMI); MachineInstr *NewMI = &*std::prev(InsertPos); DAG.LIS->InsertMachineInstrInMaps(*NewMI); diff --git a/llvm/lib/Target/AMDGPU/GCNSubtarget.h b/llvm/lib/Target/AMDGPU/GCNSubtarget.h index f377b8a..da4bd87 100644 --- a/llvm/lib/Target/AMDGPU/GCNSubtarget.h +++ b/llvm/lib/Target/AMDGPU/GCNSubtarget.h @@ -1040,6 +1040,8 @@ public: return true; } + bool enableTerminalRule() const override { return true; } + bool useAA() const override; bool enableSubRegLiveness() const override { diff --git a/llvm/lib/Target/AMDGPU/R600InstrInfo.cpp b/llvm/lib/Target/AMDGPU/R600InstrInfo.cpp index 3e256cc..0104085 100644 --- a/llvm/lib/Target/AMDGPU/R600InstrInfo.cpp +++ b/llvm/lib/Target/AMDGPU/R600InstrInfo.cpp @@ -29,7 +29,7 @@ using namespace llvm; #include "R600GenInstrInfo.inc" R600InstrInfo::R600InstrInfo(const R600Subtarget &ST) - : R600GenInstrInfo(ST, -1, -1), RI(), ST(ST) {} + : R600GenInstrInfo(ST, RI, -1, -1), RI(), ST(ST) {} bool R600InstrInfo::isVector(const MachineInstr &MI) const { return get(MI.getOpcode()).TSFlags & R600_InstFlag::VECTOR; diff --git a/llvm/lib/Target/AMDGPU/R600Subtarget.h b/llvm/lib/Target/AMDGPU/R600Subtarget.h index 22e56b6..efd99db 100644 --- a/llvm/lib/Target/AMDGPU/R600Subtarget.h +++ b/llvm/lib/Target/AMDGPU/R600Subtarget.h @@ -126,6 +126,8 @@ public: return true; } + bool enableTerminalRule() const override { return true; } + bool enableSubRegLiveness() const override { return true; } diff --git a/llvm/lib/Target/AMDGPU/SIFoldOperands.cpp b/llvm/lib/Target/AMDGPU/SIFoldOperands.cpp index 964309b..293005c 100644 --- a/llvm/lib/Target/AMDGPU/SIFoldOperands.cpp +++ b/llvm/lib/Target/AMDGPU/SIFoldOperands.cpp @@ -713,7 +713,7 @@ bool SIFoldOperandsImpl::updateOperand(FoldCandidate &Fold) const { // Verify the register is compatible with the operand. if (const TargetRegisterClass *OpRC = - TII->getRegClass(MI->getDesc(), Fold.UseOpNo, TRI)) { + TII->getRegClass(MI->getDesc(), Fold.UseOpNo)) { const TargetRegisterClass *NewRC = TRI->getRegClassForReg(*MRI, New->getReg()); @@ -2394,7 +2394,7 @@ bool SIFoldOperandsImpl::tryFoldRegSequence(MachineInstr &MI) { unsigned OpIdx = Op - &UseMI->getOperand(0); const MCInstrDesc &InstDesc = UseMI->getDesc(); - const TargetRegisterClass *OpRC = TII->getRegClass(InstDesc, OpIdx, TRI); + const TargetRegisterClass *OpRC = TII->getRegClass(InstDesc, OpIdx); if (!OpRC || !TRI->isVectorSuperClass(OpRC)) return false; diff --git a/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp b/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp index 9c74c65..9c78040 100644 --- a/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp +++ b/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp @@ -63,7 +63,8 @@ static cl::opt<bool> Fix16BitCopies( cl::ReallyHidden); SIInstrInfo::SIInstrInfo(const GCNSubtarget &ST) - : AMDGPUGenInstrInfo(ST, AMDGPU::ADJCALLSTACKUP, AMDGPU::ADJCALLSTACKDOWN), + : AMDGPUGenInstrInfo(ST, RI, AMDGPU::ADJCALLSTACKUP, + AMDGPU::ADJCALLSTACKDOWN), RI(ST), ST(ST) { SchedModel.init(&ST); } @@ -1667,8 +1668,7 @@ unsigned SIInstrInfo::getVectorRegSpillSaveOpcode( void SIInstrInfo::storeRegToStackSlot( MachineBasicBlock &MBB, MachineBasicBlock::iterator MI, Register SrcReg, - bool isKill, int FrameIndex, const TargetRegisterClass *RC, - const TargetRegisterInfo *TRI, Register VReg, + bool isKill, int FrameIndex, const TargetRegisterClass *RC, Register VReg, MachineInstr::MIFlag Flags) const { MachineFunction *MF = MBB.getParent(); SIMachineFunctionInfo *MFI = MF->getInfo<SIMachineFunctionInfo>(); @@ -1680,7 +1680,7 @@ void SIInstrInfo::storeRegToStackSlot( MachineMemOperand *MMO = MF->getMachineMemOperand( PtrInfo, MachineMemOperand::MOStore, FrameInfo.getObjectSize(FrameIndex), FrameInfo.getObjectAlign(FrameIndex)); - unsigned SpillSize = TRI->getSpillSize(*RC); + unsigned SpillSize = RI.getSpillSize(*RC); MachineRegisterInfo &MRI = MF->getRegInfo(); if (RI.isSGPRClass(RC)) { @@ -1862,14 +1862,13 @@ void SIInstrInfo::loadRegFromStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator MI, Register DestReg, int FrameIndex, const TargetRegisterClass *RC, - const TargetRegisterInfo *TRI, Register VReg, MachineInstr::MIFlag Flags) const { MachineFunction *MF = MBB.getParent(); SIMachineFunctionInfo *MFI = MF->getInfo<SIMachineFunctionInfo>(); MachineFrameInfo &FrameInfo = MF->getFrameInfo(); const DebugLoc &DL = MBB.findDebugLoc(MI); - unsigned SpillSize = TRI->getSpillSize(*RC); + unsigned SpillSize = RI.getSpillSize(*RC); MachinePointerInfo PtrInfo = MachinePointerInfo::getFixedStack(*MF, FrameIndex); @@ -2518,8 +2517,8 @@ bool SIInstrInfo::expandPostRAPseudo(MachineInstr &MI) const { void SIInstrInfo::reMaterialize(MachineBasicBlock &MBB, MachineBasicBlock::iterator I, Register DestReg, - unsigned SubIdx, const MachineInstr &Orig, - const TargetRegisterInfo &RI) const { + unsigned SubIdx, + const MachineInstr &Orig) const { // Try shrinking the instruction to remat only the part needed for current // context. @@ -2569,7 +2568,7 @@ void SIInstrInfo::reMaterialize(MachineBasicBlock &MBB, const MCInstrDesc &TID = get(NewOpcode); const TargetRegisterClass *NewRC = - RI.getAllocatableClass(getRegClass(TID, 0, &RI)); + RI.getAllocatableClass(getRegClass(TID, 0)); MRI.setRegClass(DestReg, NewRC); UseMO->setReg(DestReg); @@ -2599,7 +2598,7 @@ void SIInstrInfo::reMaterialize(MachineBasicBlock &MBB, break; } - TargetInstrInfo::reMaterialize(MBB, I, DestReg, SubIdx, Orig, RI); + TargetInstrInfo::reMaterialize(MBB, I, DestReg, SubIdx, Orig); } std::pair<MachineInstr*, MachineInstr*> @@ -3612,7 +3611,7 @@ bool SIInstrInfo::foldImmediate(MachineInstr &UseMI, MachineInstr &DefMI, AMDGPU::V_MOV_B64_PSEUDO, AMDGPU::V_ACCVGPR_WRITE_B32_e64}) { const MCInstrDesc &MovDesc = get(MovOp); - const TargetRegisterClass *MovDstRC = getRegClass(MovDesc, 0, &RI); + const TargetRegisterClass *MovDstRC = getRegClass(MovDesc, 0); if (Is16Bit) { // We just need to find a correctly sized register class, so the // subregister index compatibility doesn't matter since we're statically @@ -6027,9 +6026,8 @@ SIInstrInfo::getWholeWaveFunctionSetup(MachineFunction &MF) const { // FIXME: This should not be an overridable function. All subtarget dependent // operand modifications should go through isLookupRegClassByHwMode in the // generic handling. -const TargetRegisterClass * -SIInstrInfo::getRegClass(const MCInstrDesc &TID, unsigned OpNum, - const TargetRegisterInfo *TRI) const { +const TargetRegisterClass *SIInstrInfo::getRegClass(const MCInstrDesc &TID, + unsigned OpNum) const { if (OpNum >= TID.getNumOperands()) return nullptr; const MCOperandInfo &OpInfo = TID.operands()[OpNum]; @@ -6804,7 +6802,7 @@ void SIInstrInfo::legalizeOperandsFLAT(MachineRegisterInfo &MRI, return; const TargetRegisterClass *DeclaredRC = - getRegClass(MI.getDesc(), SAddr->getOperandNo(), &RI); + getRegClass(MI.getDesc(), SAddr->getOperandNo()); Register ToSGPR = readlaneVGPRToSGPR(SAddr->getReg(), MI, MRI, DeclaredRC); SAddr->setReg(ToSGPR); diff --git a/llvm/lib/Target/AMDGPU/SIInstrInfo.h b/llvm/lib/Target/AMDGPU/SIInstrInfo.h index 8d693b1..c048b85 100644 --- a/llvm/lib/Target/AMDGPU/SIInstrInfo.h +++ b/llvm/lib/Target/AMDGPU/SIInstrInfo.h @@ -307,22 +307,19 @@ public: void storeRegToStackSlot( MachineBasicBlock &MBB, MachineBasicBlock::iterator MI, Register SrcReg, - bool isKill, int FrameIndex, const TargetRegisterClass *RC, - const TargetRegisterInfo *TRI, Register VReg, + bool isKill, int FrameIndex, const TargetRegisterClass *RC, Register VReg, MachineInstr::MIFlag Flags = MachineInstr::NoFlags) const override; void loadRegFromStackSlot( MachineBasicBlock &MBB, MachineBasicBlock::iterator MI, Register DestReg, - int FrameIndex, const TargetRegisterClass *RC, - const TargetRegisterInfo *TRI, Register VReg, + int FrameIndex, const TargetRegisterClass *RC, Register VReg, MachineInstr::MIFlag Flags = MachineInstr::NoFlags) const override; bool expandPostRAPseudo(MachineInstr &MI) const override; void reMaterialize(MachineBasicBlock &MBB, MachineBasicBlock::iterator MI, Register DestReg, unsigned SubIdx, - const MachineInstr &Orig, - const TargetRegisterInfo &TRI) const override; + const MachineInstr &Orig) const override; // Splits a V_MOV_B64_DPP_PSEUDO opcode into a pair of v_mov_b32_dpp // instructions. Returns a pair of generated instructions. @@ -1622,9 +1619,8 @@ public: /// Return true if this opcode should not be used by codegen. bool isAsmOnlyOpcode(int MCOp) const; - const TargetRegisterClass * - getRegClass(const MCInstrDesc &TID, unsigned OpNum, - const TargetRegisterInfo *TRI) const override; + const TargetRegisterClass *getRegClass(const MCInstrDesc &TID, + unsigned OpNum) const override; void fixImplicitOperands(MachineInstr &MI) const; diff --git a/llvm/lib/Target/AMDGPU/SILoadStoreOptimizer.cpp b/llvm/lib/Target/AMDGPU/SILoadStoreOptimizer.cpp index f0d1117..fcf91e0 100644 --- a/llvm/lib/Target/AMDGPU/SILoadStoreOptimizer.cpp +++ b/llvm/lib/Target/AMDGPU/SILoadStoreOptimizer.cpp @@ -233,10 +233,11 @@ private: void copyToDestRegs(CombineInfo &CI, CombineInfo &Paired, MachineBasicBlock::iterator InsertBefore, - AMDGPU::OpName OpName, Register DestReg) const; + const DebugLoc &DL, AMDGPU::OpName OpName, + Register DestReg) const; Register copyFromSrcRegs(CombineInfo &CI, CombineInfo &Paired, MachineBasicBlock::iterator InsertBefore, - AMDGPU::OpName OpName) const; + const DebugLoc &DL, AMDGPU::OpName OpName) const; unsigned read2Opcode(unsigned EltSize) const; unsigned read2ST64Opcode(unsigned EltSize) const; @@ -1336,11 +1337,9 @@ SILoadStoreOptimizer::checkAndPrepareMerge(CombineInfo &CI, int Data1Idx = AMDGPU::getNamedOperandIdx(Write2Opc.getOpcode(), AMDGPU::OpName::data1); - const TargetRegisterClass *DataRC0 = - TII->getRegClass(Write2Opc, Data0Idx, TRI); + const TargetRegisterClass *DataRC0 = TII->getRegClass(Write2Opc, Data0Idx); - const TargetRegisterClass *DataRC1 = - TII->getRegClass(Write2Opc, Data1Idx, TRI); + const TargetRegisterClass *DataRC1 = TII->getRegClass(Write2Opc, Data1Idx); if (unsigned SubReg = Data0->getSubReg()) { DataRC0 = TRI->getMatchingSuperRegClass(MRI->getRegClass(Data0->getReg()), @@ -1367,10 +1366,9 @@ SILoadStoreOptimizer::checkAndPrepareMerge(CombineInfo &CI, // Paired. void SILoadStoreOptimizer::copyToDestRegs( CombineInfo &CI, CombineInfo &Paired, - MachineBasicBlock::iterator InsertBefore, AMDGPU::OpName OpName, - Register DestReg) const { + MachineBasicBlock::iterator InsertBefore, const DebugLoc &DL, + AMDGPU::OpName OpName, Register DestReg) const { MachineBasicBlock *MBB = CI.I->getParent(); - DebugLoc DL = CI.I->getDebugLoc(); auto [SubRegIdx0, SubRegIdx1] = getSubRegIdxs(CI, Paired); @@ -1398,9 +1396,9 @@ void SILoadStoreOptimizer::copyToDestRegs( Register SILoadStoreOptimizer::copyFromSrcRegs(CombineInfo &CI, CombineInfo &Paired, MachineBasicBlock::iterator InsertBefore, + const DebugLoc &DL, AMDGPU::OpName OpName) const { MachineBasicBlock *MBB = CI.I->getParent(); - DebugLoc DL = CI.I->getDebugLoc(); auto [SubRegIdx0, SubRegIdx1] = getSubRegIdxs(CI, Paired); @@ -1456,7 +1454,8 @@ SILoadStoreOptimizer::mergeRead2Pair(CombineInfo &CI, CombineInfo &Paired, const TargetRegisterClass *SuperRC = getTargetRegisterClass(CI, Paired); Register DestReg = MRI->createVirtualRegister(SuperRC); - DebugLoc DL = CI.I->getDebugLoc(); + DebugLoc DL = + DebugLoc::getMergedLocation(CI.I->getDebugLoc(), Paired.I->getDebugLoc()); Register BaseReg = AddrReg->getReg(); unsigned BaseSubReg = AddrReg->getSubReg(); @@ -1484,7 +1483,7 @@ SILoadStoreOptimizer::mergeRead2Pair(CombineInfo &CI, CombineInfo &Paired, .addImm(0) // gds .cloneMergedMemRefs({&*CI.I, &*Paired.I}); - copyToDestRegs(CI, Paired, InsertBefore, AMDGPU::OpName::vdst, DestReg); + copyToDestRegs(CI, Paired, InsertBefore, DL, AMDGPU::OpName::vdst, DestReg); CI.I->eraseFromParent(); Paired.I->eraseFromParent(); @@ -1541,7 +1540,8 @@ MachineBasicBlock::iterator SILoadStoreOptimizer::mergeWrite2Pair( (NewOffset0 != NewOffset1) && "Computed offset doesn't fit"); const MCInstrDesc &Write2Desc = TII->get(Opc); - DebugLoc DL = CI.I->getDebugLoc(); + DebugLoc DL = + DebugLoc::getMergedLocation(CI.I->getDebugLoc(), Paired.I->getDebugLoc()); Register BaseReg = AddrReg->getReg(); unsigned BaseSubReg = AddrReg->getSubReg(); @@ -1582,7 +1582,9 @@ MachineBasicBlock::iterator SILoadStoreOptimizer::mergeImagePair(CombineInfo &CI, CombineInfo &Paired, MachineBasicBlock::iterator InsertBefore) { MachineBasicBlock *MBB = CI.I->getParent(); - DebugLoc DL = CI.I->getDebugLoc(); + DebugLoc DL = + DebugLoc::getMergedLocation(CI.I->getDebugLoc(), Paired.I->getDebugLoc()); + const unsigned Opcode = getNewOpcode(CI, Paired); const TargetRegisterClass *SuperRC = getTargetRegisterClass(CI, Paired); @@ -1607,7 +1609,7 @@ SILoadStoreOptimizer::mergeImagePair(CombineInfo &CI, CombineInfo &Paired, MachineInstr *New = MIB.addMemOperand(combineKnownAdjacentMMOs(CI, Paired)); - copyToDestRegs(CI, Paired, InsertBefore, AMDGPU::OpName::vdata, DestReg); + copyToDestRegs(CI, Paired, InsertBefore, DL, AMDGPU::OpName::vdata, DestReg); CI.I->eraseFromParent(); Paired.I->eraseFromParent(); @@ -1618,7 +1620,9 @@ MachineBasicBlock::iterator SILoadStoreOptimizer::mergeSMemLoadImmPair( CombineInfo &CI, CombineInfo &Paired, MachineBasicBlock::iterator InsertBefore) { MachineBasicBlock *MBB = CI.I->getParent(); - DebugLoc DL = CI.I->getDebugLoc(); + DebugLoc DL = + DebugLoc::getMergedLocation(CI.I->getDebugLoc(), Paired.I->getDebugLoc()); + const unsigned Opcode = getNewOpcode(CI, Paired); const TargetRegisterClass *SuperRC = getTargetRegisterClass(CI, Paired); @@ -1639,7 +1643,7 @@ MachineBasicBlock::iterator SILoadStoreOptimizer::mergeSMemLoadImmPair( New.addImm(MergedOffset); New.addImm(CI.CPol).addMemOperand(combineKnownAdjacentMMOs(CI, Paired)); - copyToDestRegs(CI, Paired, InsertBefore, AMDGPU::OpName::sdst, DestReg); + copyToDestRegs(CI, Paired, InsertBefore, DL, AMDGPU::OpName::sdst, DestReg); CI.I->eraseFromParent(); Paired.I->eraseFromParent(); @@ -1650,7 +1654,9 @@ MachineBasicBlock::iterator SILoadStoreOptimizer::mergeBufferLoadPair( CombineInfo &CI, CombineInfo &Paired, MachineBasicBlock::iterator InsertBefore) { MachineBasicBlock *MBB = CI.I->getParent(); - DebugLoc DL = CI.I->getDebugLoc(); + + DebugLoc DL = + DebugLoc::getMergedLocation(CI.I->getDebugLoc(), Paired.I->getDebugLoc()); const unsigned Opcode = getNewOpcode(CI, Paired); @@ -1680,7 +1686,7 @@ MachineBasicBlock::iterator SILoadStoreOptimizer::mergeBufferLoadPair( .addImm(0) // swz .addMemOperand(combineKnownAdjacentMMOs(CI, Paired)); - copyToDestRegs(CI, Paired, InsertBefore, AMDGPU::OpName::vdata, DestReg); + copyToDestRegs(CI, Paired, InsertBefore, DL, AMDGPU::OpName::vdata, DestReg); CI.I->eraseFromParent(); Paired.I->eraseFromParent(); @@ -1691,7 +1697,9 @@ MachineBasicBlock::iterator SILoadStoreOptimizer::mergeTBufferLoadPair( CombineInfo &CI, CombineInfo &Paired, MachineBasicBlock::iterator InsertBefore) { MachineBasicBlock *MBB = CI.I->getParent(); - DebugLoc DL = CI.I->getDebugLoc(); + + DebugLoc DL = + DebugLoc::getMergedLocation(CI.I->getDebugLoc(), Paired.I->getDebugLoc()); const unsigned Opcode = getNewOpcode(CI, Paired); @@ -1731,7 +1739,7 @@ MachineBasicBlock::iterator SILoadStoreOptimizer::mergeTBufferLoadPair( .addImm(0) // swz .addMemOperand(combineKnownAdjacentMMOs(CI, Paired)); - copyToDestRegs(CI, Paired, InsertBefore, AMDGPU::OpName::vdata, DestReg); + copyToDestRegs(CI, Paired, InsertBefore, DL, AMDGPU::OpName::vdata, DestReg); CI.I->eraseFromParent(); Paired.I->eraseFromParent(); @@ -1742,12 +1750,13 @@ MachineBasicBlock::iterator SILoadStoreOptimizer::mergeTBufferStorePair( CombineInfo &CI, CombineInfo &Paired, MachineBasicBlock::iterator InsertBefore) { MachineBasicBlock *MBB = CI.I->getParent(); - DebugLoc DL = CI.I->getDebugLoc(); + DebugLoc DL = + DebugLoc::getMergedLocation(CI.I->getDebugLoc(), Paired.I->getDebugLoc()); const unsigned Opcode = getNewOpcode(CI, Paired); Register SrcReg = - copyFromSrcRegs(CI, Paired, InsertBefore, AMDGPU::OpName::vdata); + copyFromSrcRegs(CI, Paired, InsertBefore, DL, AMDGPU::OpName::vdata); auto MIB = BuildMI(*MBB, InsertBefore, DL, TII->get(Opcode)) .addReg(SrcReg, RegState::Kill); @@ -1789,7 +1798,9 @@ MachineBasicBlock::iterator SILoadStoreOptimizer::mergeFlatLoadPair( CombineInfo &CI, CombineInfo &Paired, MachineBasicBlock::iterator InsertBefore) { MachineBasicBlock *MBB = CI.I->getParent(); - DebugLoc DL = CI.I->getDebugLoc(); + + DebugLoc DL = + DebugLoc::getMergedLocation(CI.I->getDebugLoc(), Paired.I->getDebugLoc()); const unsigned Opcode = getNewOpcode(CI, Paired); @@ -1807,7 +1818,7 @@ MachineBasicBlock::iterator SILoadStoreOptimizer::mergeFlatLoadPair( .addImm(CI.CPol) .addMemOperand(combineKnownAdjacentMMOs(CI, Paired)); - copyToDestRegs(CI, Paired, InsertBefore, AMDGPU::OpName::vdst, DestReg); + copyToDestRegs(CI, Paired, InsertBefore, DL, AMDGPU::OpName::vdst, DestReg); CI.I->eraseFromParent(); Paired.I->eraseFromParent(); @@ -1818,12 +1829,14 @@ MachineBasicBlock::iterator SILoadStoreOptimizer::mergeFlatStorePair( CombineInfo &CI, CombineInfo &Paired, MachineBasicBlock::iterator InsertBefore) { MachineBasicBlock *MBB = CI.I->getParent(); - DebugLoc DL = CI.I->getDebugLoc(); + + DebugLoc DL = + DebugLoc::getMergedLocation(CI.I->getDebugLoc(), Paired.I->getDebugLoc()); const unsigned Opcode = getNewOpcode(CI, Paired); Register SrcReg = - copyFromSrcRegs(CI, Paired, InsertBefore, AMDGPU::OpName::vdata); + copyFromSrcRegs(CI, Paired, InsertBefore, DL, AMDGPU::OpName::vdata); auto MIB = BuildMI(*MBB, InsertBefore, DL, TII->get(Opcode)) .add(*TII->getNamedOperand(*CI.I, AMDGPU::OpName::vaddr)) @@ -2094,12 +2107,13 @@ MachineBasicBlock::iterator SILoadStoreOptimizer::mergeBufferStorePair( CombineInfo &CI, CombineInfo &Paired, MachineBasicBlock::iterator InsertBefore) { MachineBasicBlock *MBB = CI.I->getParent(); - DebugLoc DL = CI.I->getDebugLoc(); + DebugLoc DL = + DebugLoc::getMergedLocation(CI.I->getDebugLoc(), Paired.I->getDebugLoc()); const unsigned Opcode = getNewOpcode(CI, Paired); Register SrcReg = - copyFromSrcRegs(CI, Paired, InsertBefore, AMDGPU::OpName::vdata); + copyFromSrcRegs(CI, Paired, InsertBefore, DL, AMDGPU::OpName::vdata); auto MIB = BuildMI(*MBB, InsertBefore, DL, TII->get(Opcode)) .addReg(SrcReg, RegState::Kill); diff --git a/llvm/lib/Target/AMDGPU/SILowerSGPRSpills.cpp b/llvm/lib/Target/AMDGPU/SILowerSGPRSpills.cpp index 40eeeb8..cbd08f0 100644 --- a/llvm/lib/Target/AMDGPU/SILowerSGPRSpills.cpp +++ b/llvm/lib/Target/AMDGPU/SILowerSGPRSpills.cpp @@ -117,27 +117,26 @@ static void insertCSRSaves(MachineBasicBlock &SaveBlock, MachineFunction &MF = *SaveBlock.getParent(); const TargetInstrInfo &TII = *MF.getSubtarget().getInstrInfo(); const TargetFrameLowering *TFI = MF.getSubtarget().getFrameLowering(); - const TargetRegisterInfo *TRI = MF.getSubtarget().getRegisterInfo(); const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>(); const SIRegisterInfo *RI = ST.getRegisterInfo(); MachineBasicBlock::iterator I = SaveBlock.begin(); - if (!TFI->spillCalleeSavedRegisters(SaveBlock, I, CSI, TRI)) { + if (!TFI->spillCalleeSavedRegisters(SaveBlock, I, CSI, RI)) { for (const CalleeSavedInfo &CS : CSI) { // Insert the spill to the stack frame. MCRegister Reg = CS.getReg(); MachineInstrSpan MIS(I, &SaveBlock); - const TargetRegisterClass *RC = TRI->getMinimalPhysRegClass( + const TargetRegisterClass *RC = RI->getMinimalPhysRegClass( Reg, Reg == RI->getReturnAddressReg(MF) ? MVT::i64 : MVT::i32); // If this value was already livein, we probably have a direct use of the // incoming register value, so don't kill at the spill point. This happens // since we pass some special inputs (workgroup IDs) in the callee saved // range. - const bool IsLiveIn = isLiveIntoMBB(Reg, SaveBlock, TRI); + const bool IsLiveIn = isLiveIntoMBB(Reg, SaveBlock, RI); TII.storeRegToStackSlot(SaveBlock, I, Reg, !IsLiveIn, CS.getFrameIdx(), - RC, TRI, Register()); + RC, Register()); if (Indexes) { assert(std::distance(MIS.begin(), I) == 1); diff --git a/llvm/lib/Target/AMDGPU/SIPeepholeSDWA.cpp b/llvm/lib/Target/AMDGPU/SIPeepholeSDWA.cpp index caff354..86ca22c 100644 --- a/llvm/lib/Target/AMDGPU/SIPeepholeSDWA.cpp +++ b/llvm/lib/Target/AMDGPU/SIPeepholeSDWA.cpp @@ -1346,7 +1346,7 @@ void SIPeepholeSDWA::legalizeScalarOperands(MachineInstr &MI, continue; unsigned I = Op.getOperandNo(); - const TargetRegisterClass *OpRC = TII->getRegClass(Desc, I, TRI); + const TargetRegisterClass *OpRC = TII->getRegClass(Desc, I); if (!OpRC || !TRI->isVSSuperClass(OpRC)) continue; diff --git a/llvm/lib/Target/AMDGPU/VOP1Instructions.td b/llvm/lib/Target/AMDGPU/VOP1Instructions.td index 54f57e0..85adcab 100644 --- a/llvm/lib/Target/AMDGPU/VOP1Instructions.td +++ b/llvm/lib/Target/AMDGPU/VOP1Instructions.td @@ -513,6 +513,13 @@ defm V_CVT_U16_F16 : VOP1Inst_t16_with_profiles <"v_cvt_u16_f16", defm V_CVT_I16_F16 : VOP1Inst_t16_with_profiles <"v_cvt_i16_f16", VOP_I16_F16_SPECIAL_OMOD, VOP_I16_F16_SPECIAL_OMOD_t16, VOP_I16_F16_SPECIAL_OMOD_fake16, fp_to_sint>; + +let HasClamp = 0, HasOMod = 0 in { +def V_TRANS_BF16_Profile : VOPProfile <[bf16, bf16, untyped, untyped]>; +def V_TRANS_BF16_t16_Profile : VOPProfile_True16 <VOP_BF16_BF16>; +def V_TRANS_BF16_fake16_Profile : VOPProfile_Fake16 <VOP_BF16_BF16>; +} + let TRANS = 1, SchedRW = [WriteTrans32] in { defm V_RCP_F16 : VOP1Inst_t16 <"v_rcp_f16", VOP_F16_F16, AMDGPUrcp>; defm V_SQRT_F16 : VOP1Inst_t16 <"v_sqrt_f16", VOP_F16_F16, any_amdgcn_sqrt>; @@ -527,14 +534,30 @@ defm V_TANH_F16 : VOP1Inst_t16 <"v_tanh_f16", VOP_F16_F16, int_amdgcn_tanh>; } let SubtargetPredicate = HasBF16TransInsts in { -defm V_TANH_BF16 : VOP1Inst_t16 <"v_tanh_bf16", VOP_BF16_BF16, int_amdgcn_tanh>; -defm V_RCP_BF16 : VOP1Inst_t16 <"v_rcp_bf16", VOP_BF16_BF16, AMDGPUrcp>; -defm V_SQRT_BF16 : VOP1Inst_t16 <"v_sqrt_bf16", VOP_BF16_BF16, any_amdgcn_sqrt>; -defm V_RSQ_BF16 : VOP1Inst_t16 <"v_rsq_bf16", VOP_BF16_BF16, AMDGPUrsq>; -defm V_LOG_BF16 : VOP1Inst_t16 <"v_log_bf16", VOP_BF16_BF16, AMDGPUlogf16>; -defm V_EXP_BF16 : VOP1Inst_t16 <"v_exp_bf16", VOP_BF16_BF16, AMDGPUexpf16>; -defm V_SIN_BF16 : VOP1Inst_t16 <"v_sin_bf16", VOP_BF16_BF16, AMDGPUsin>; -defm V_COS_BF16 : VOP1Inst_t16 <"v_cos_bf16", VOP_BF16_BF16, AMDGPUcos>; +defm V_TANH_BF16 : VOP1Inst_t16_with_profiles<"v_tanh_bf16", V_TRANS_BF16_Profile, + V_TRANS_BF16_t16_Profile, V_TRANS_BF16_fake16_Profile, + int_amdgcn_tanh>; +defm V_RCP_BF16 : VOP1Inst_t16_with_profiles<"v_rcp_bf16", V_TRANS_BF16_Profile, + V_TRANS_BF16_t16_Profile, V_TRANS_BF16_fake16_Profile, + AMDGPUrcp>; +defm V_SQRT_BF16 : VOP1Inst_t16_with_profiles<"v_sqrt_bf16", V_TRANS_BF16_Profile, + V_TRANS_BF16_t16_Profile, V_TRANS_BF16_fake16_Profile, + any_amdgcn_sqrt>; +defm V_RSQ_BF16 : VOP1Inst_t16_with_profiles<"v_rsq_bf16", V_TRANS_BF16_Profile, + V_TRANS_BF16_t16_Profile, V_TRANS_BF16_fake16_Profile, + AMDGPUrsq>; +defm V_LOG_BF16 : VOP1Inst_t16_with_profiles<"v_log_bf16", V_TRANS_BF16_Profile, + V_TRANS_BF16_t16_Profile, V_TRANS_BF16_fake16_Profile, + AMDGPUlogf16>; +defm V_EXP_BF16 : VOP1Inst_t16_with_profiles<"v_exp_bf16", V_TRANS_BF16_Profile, + V_TRANS_BF16_t16_Profile, V_TRANS_BF16_fake16_Profile, + AMDGPUexpf16>; +defm V_SIN_BF16 : VOP1Inst_t16_with_profiles<"v_sin_bf16", V_TRANS_BF16_Profile, + V_TRANS_BF16_t16_Profile, V_TRANS_BF16_fake16_Profile, + AMDGPUsin>; +defm V_COS_BF16 : VOP1Inst_t16_with_profiles<"v_cos_bf16", V_TRANS_BF16_Profile, + V_TRANS_BF16_t16_Profile, V_TRANS_BF16_fake16_Profile, + AMDGPUcos>; } } // End TRANS = 1, SchedRW = [WriteTrans32] defm V_FREXP_MANT_F16 : VOP1Inst_t16 <"v_frexp_mant_f16", VOP_F16_F16, int_amdgcn_frexp_mant>; diff --git a/llvm/lib/Target/AMDGPU/VOPInstructions.td b/llvm/lib/Target/AMDGPU/VOPInstructions.td index 8325c62..6df91a1 100644 --- a/llvm/lib/Target/AMDGPU/VOPInstructions.td +++ b/llvm/lib/Target/AMDGPU/VOPInstructions.td @@ -1357,8 +1357,12 @@ class VOPBinOpClampPat<SDPatternOperator node, Instruction inst, ValueType vt> : class getVOP3ModPat<VOPProfile P, SDPatternOperator node> { dag src0 = !if(P.HasOMod, - (VOP3Mods0 P.Src0VT:$src0, i32:$src0_modifiers, i1:$clamp, i32:$omod), - (VOP3Mods0 P.Src0VT:$src0, i32:$src0_modifiers, i1:$clamp)); + !if(P.HasClamp, + (VOP3Mods0 P.Src0VT:$src0, i32:$src0_modifiers, i1:$clamp, i32:$omod), + (VOP3Mods0 P.Src0VT:$src0, i32:$src0_modifiers, i32:$omod)), + !if(P.HasClamp, + (VOP3Mods0 P.Src0VT:$src0, i32:$src0_modifiers, i1:$clamp), + (VOP3Mods0 P.Src0VT:$src0, i32:$src0_modifiers))); list<dag> ret3 = [(set P.DstVT:$vdst, (DivergentFragOrOp<node, P>.ret (P.Src0VT src0), diff --git a/llvm/lib/Target/ARC/ARCInstrInfo.cpp b/llvm/lib/Target/ARC/ARCInstrInfo.cpp index 05bcb35..e17ecbf 100644 --- a/llvm/lib/Target/ARC/ARCInstrInfo.cpp +++ b/llvm/lib/Target/ARC/ARCInstrInfo.cpp @@ -44,7 +44,8 @@ enum TSFlagsConstants { void ARCInstrInfo::anchor() {} ARCInstrInfo::ARCInstrInfo(const ARCSubtarget &ST) - : ARCGenInstrInfo(ST, ARC::ADJCALLSTACKDOWN, ARC::ADJCALLSTACKUP), RI(ST) {} + : ARCGenInstrInfo(ST, RI, ARC::ADJCALLSTACKDOWN, ARC::ADJCALLSTACKUP), + RI(ST) {} static bool isZeroImm(const MachineOperand &Op) { return Op.isImm() && Op.getImm() == 0; @@ -293,8 +294,7 @@ void ARCInstrInfo::copyPhysReg(MachineBasicBlock &MBB, void ARCInstrInfo::storeRegToStackSlot( MachineBasicBlock &MBB, MachineBasicBlock::iterator I, Register SrcReg, - bool IsKill, int FrameIndex, const TargetRegisterClass *RC, - const TargetRegisterInfo *TRI, Register VReg, + bool IsKill, int FrameIndex, const TargetRegisterClass *RC, Register VReg, MachineInstr::MIFlag Flags) const { DebugLoc DL = MBB.findDebugLoc(I); MachineFunction &MF = *MBB.getParent(); @@ -306,11 +306,11 @@ void ARCInstrInfo::storeRegToStackSlot( MFI.getObjectAlign(FrameIndex)); assert(MMO && "Couldn't get MachineMemOperand for store to stack."); - assert(TRI->getSpillSize(*RC) == 4 && + assert(TRI.getSpillSize(*RC) == 4 && "Only support 4-byte stores to stack now."); assert(ARC::GPR32RegClass.hasSubClassEq(RC) && "Only support GPR32 stores to stack now."); - LLVM_DEBUG(dbgs() << "Created store reg=" << printReg(SrcReg, TRI) + LLVM_DEBUG(dbgs() << "Created store reg=" << printReg(SrcReg, &TRI) << " to FrameIndex=" << FrameIndex << "\n"); BuildMI(MBB, I, DL, get(ARC::ST_rs9)) .addReg(SrcReg, getKillRegState(IsKill)) @@ -323,7 +323,6 @@ void ARCInstrInfo::loadRegFromStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator I, Register DestReg, int FrameIndex, const TargetRegisterClass *RC, - const TargetRegisterInfo *TRI, Register VReg, MachineInstr::MIFlag Flags) const { DebugLoc DL = MBB.findDebugLoc(I); @@ -335,11 +334,11 @@ void ARCInstrInfo::loadRegFromStackSlot(MachineBasicBlock &MBB, MFI.getObjectAlign(FrameIndex)); assert(MMO && "Couldn't get MachineMemOperand for store to stack."); - assert(TRI->getSpillSize(*RC) == 4 && + assert(TRI.getSpillSize(*RC) == 4 && "Only support 4-byte loads from stack now."); assert(ARC::GPR32RegClass.hasSubClassEq(RC) && "Only support GPR32 stores to stack now."); - LLVM_DEBUG(dbgs() << "Created load reg=" << printReg(DestReg, TRI) + LLVM_DEBUG(dbgs() << "Created load reg=" << printReg(DestReg, &TRI) << " from FrameIndex=" << FrameIndex << "\n"); BuildMI(MBB, I, DL, get(ARC::LD_rs9)) .addReg(DestReg, RegState::Define) diff --git a/llvm/lib/Target/ARC/ARCInstrInfo.h b/llvm/lib/Target/ARC/ARCInstrInfo.h index 2cf05ba..ebeaf87 100644 --- a/llvm/lib/Target/ARC/ARCInstrInfo.h +++ b/llvm/lib/Target/ARC/ARCInstrInfo.h @@ -70,14 +70,12 @@ public: void storeRegToStackSlot( MachineBasicBlock &MBB, MachineBasicBlock::iterator MI, Register SrcReg, - bool IsKill, int FrameIndex, const TargetRegisterClass *RC, - const TargetRegisterInfo *TRI, Register VReg, + bool IsKill, int FrameIndex, const TargetRegisterClass *RC, Register VReg, MachineInstr::MIFlag Flags = MachineInstr::NoFlags) const override; void loadRegFromStackSlot( MachineBasicBlock &MBB, MachineBasicBlock::iterator MI, Register DestReg, - int FrameIndex, const TargetRegisterClass *RC, - const TargetRegisterInfo *TRI, Register VReg, + int FrameIndex, const TargetRegisterClass *RC, Register VReg, MachineInstr::MIFlag Flags = MachineInstr::NoFlags) const override; bool diff --git a/llvm/lib/Target/ARM/ARMBaseInstrInfo.cpp b/llvm/lib/Target/ARM/ARMBaseInstrInfo.cpp index 22769db..6077c18 100644 --- a/llvm/lib/Target/ARM/ARMBaseInstrInfo.cpp +++ b/llvm/lib/Target/ARM/ARMBaseInstrInfo.cpp @@ -107,8 +107,9 @@ static const ARM_MLxEntry ARM_MLxTable[] = { { ARM::VMLSslfq, ARM::VMULslfq, ARM::VSUBfq, false, true }, }; -ARMBaseInstrInfo::ARMBaseInstrInfo(const ARMSubtarget &STI) - : ARMGenInstrInfo(STI, ARM::ADJCALLSTACKDOWN, ARM::ADJCALLSTACKUP), +ARMBaseInstrInfo::ARMBaseInstrInfo(const ARMSubtarget &STI, + const ARMBaseRegisterInfo &TRI) + : ARMGenInstrInfo(STI, TRI, ARM::ADJCALLSTACKDOWN, ARM::ADJCALLSTACKUP), Subtarget(STI) { for (unsigned i = 0, e = std::size(ARM_MLxTable); i != e; ++i) { if (!MLxEntryMap.insert(std::make_pair(ARM_MLxTable[i].MLxOpc, i)).second) @@ -928,15 +929,15 @@ ARMBaseInstrInfo::describeLoadedValue(const MachineInstr &MI, return TargetInstrInfo::describeLoadedValue(MI, Reg); } -const MachineInstrBuilder & -ARMBaseInstrInfo::AddDReg(MachineInstrBuilder &MIB, unsigned Reg, - unsigned SubIdx, unsigned State, - const TargetRegisterInfo *TRI) const { +const MachineInstrBuilder &ARMBaseInstrInfo::AddDReg(MachineInstrBuilder &MIB, + unsigned Reg, + unsigned SubIdx, + unsigned State) const { if (!SubIdx) return MIB.addReg(Reg, State); if (Register::isPhysicalRegister(Reg)) - return MIB.addReg(TRI->getSubReg(Reg, SubIdx), State); + return MIB.addReg(getRegisterInfo().getSubReg(Reg, SubIdx), State); return MIB.addReg(Reg, State, SubIdx); } @@ -944,18 +945,18 @@ void ARMBaseInstrInfo::storeRegToStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator I, Register SrcReg, bool isKill, int FI, const TargetRegisterClass *RC, - const TargetRegisterInfo *TRI, Register VReg, MachineInstr::MIFlag Flags) const { MachineFunction &MF = *MBB.getParent(); MachineFrameInfo &MFI = MF.getFrameInfo(); Align Alignment = MFI.getObjectAlign(FI); + const ARMBaseRegisterInfo &TRI = getRegisterInfo(); MachineMemOperand *MMO = MF.getMachineMemOperand( MachinePointerInfo::getFixedStack(MF, FI), MachineMemOperand::MOStore, MFI.getObjectSize(FI), Alignment); - switch (TRI->getSpillSize(*RC)) { + switch (TRI.getSpillSize(*RC)) { case 2: if (ARM::HPRRegClass.hasSubClassEq(RC)) { BuildMI(MBB, I, DebugLoc(), get(ARM::VSTRH)) @@ -1010,8 +1011,8 @@ void ARMBaseInstrInfo::storeRegToStackSlot(MachineBasicBlock &MBB, } else if (ARM::GPRPairRegClass.hasSubClassEq(RC)) { if (Subtarget.hasV5TEOps()) { MachineInstrBuilder MIB = BuildMI(MBB, I, DebugLoc(), get(ARM::STRD)); - AddDReg(MIB, SrcReg, ARM::gsub_0, getKillRegState(isKill), TRI); - AddDReg(MIB, SrcReg, ARM::gsub_1, 0, TRI); + AddDReg(MIB, SrcReg, ARM::gsub_0, getKillRegState(isKill)); + AddDReg(MIB, SrcReg, ARM::gsub_1, 0); MIB.addFrameIndex(FI).addReg(0).addImm(0).addMemOperand(MMO) .add(predOps(ARMCC::AL)); } else { @@ -1021,8 +1022,8 @@ void ARMBaseInstrInfo::storeRegToStackSlot(MachineBasicBlock &MBB, .addFrameIndex(FI) .addMemOperand(MMO) .add(predOps(ARMCC::AL)); - AddDReg(MIB, SrcReg, ARM::gsub_0, getKillRegState(isKill), TRI); - AddDReg(MIB, SrcReg, ARM::gsub_1, 0, TRI); + AddDReg(MIB, SrcReg, ARM::gsub_0, getKillRegState(isKill)); + AddDReg(MIB, SrcReg, ARM::gsub_1, 0); } } else llvm_unreachable("Unknown reg class!"); @@ -1072,9 +1073,9 @@ void ARMBaseInstrInfo::storeRegToStackSlot(MachineBasicBlock &MBB, .addFrameIndex(FI) .add(predOps(ARMCC::AL)) .addMemOperand(MMO); - MIB = AddDReg(MIB, SrcReg, ARM::dsub_0, getKillRegState(isKill), TRI); - MIB = AddDReg(MIB, SrcReg, ARM::dsub_1, 0, TRI); - AddDReg(MIB, SrcReg, ARM::dsub_2, 0, TRI); + MIB = AddDReg(MIB, SrcReg, ARM::dsub_0, getKillRegState(isKill)); + MIB = AddDReg(MIB, SrcReg, ARM::dsub_1, 0); + AddDReg(MIB, SrcReg, ARM::dsub_2, 0); } } else llvm_unreachable("Unknown reg class!"); @@ -1104,10 +1105,10 @@ void ARMBaseInstrInfo::storeRegToStackSlot(MachineBasicBlock &MBB, .addFrameIndex(FI) .add(predOps(ARMCC::AL)) .addMemOperand(MMO); - MIB = AddDReg(MIB, SrcReg, ARM::dsub_0, getKillRegState(isKill), TRI); - MIB = AddDReg(MIB, SrcReg, ARM::dsub_1, 0, TRI); - MIB = AddDReg(MIB, SrcReg, ARM::dsub_2, 0, TRI); - AddDReg(MIB, SrcReg, ARM::dsub_3, 0, TRI); + MIB = AddDReg(MIB, SrcReg, ARM::dsub_0, getKillRegState(isKill)); + MIB = AddDReg(MIB, SrcReg, ARM::dsub_1, 0); + MIB = AddDReg(MIB, SrcReg, ARM::dsub_2, 0); + AddDReg(MIB, SrcReg, ARM::dsub_3, 0); } } else llvm_unreachable("Unknown reg class!"); @@ -1124,14 +1125,14 @@ void ARMBaseInstrInfo::storeRegToStackSlot(MachineBasicBlock &MBB, .addFrameIndex(FI) .add(predOps(ARMCC::AL)) .addMemOperand(MMO); - MIB = AddDReg(MIB, SrcReg, ARM::dsub_0, getKillRegState(isKill), TRI); - MIB = AddDReg(MIB, SrcReg, ARM::dsub_1, 0, TRI); - MIB = AddDReg(MIB, SrcReg, ARM::dsub_2, 0, TRI); - MIB = AddDReg(MIB, SrcReg, ARM::dsub_3, 0, TRI); - MIB = AddDReg(MIB, SrcReg, ARM::dsub_4, 0, TRI); - MIB = AddDReg(MIB, SrcReg, ARM::dsub_5, 0, TRI); - MIB = AddDReg(MIB, SrcReg, ARM::dsub_6, 0, TRI); - AddDReg(MIB, SrcReg, ARM::dsub_7, 0, TRI); + MIB = AddDReg(MIB, SrcReg, ARM::dsub_0, getKillRegState(isKill)); + MIB = AddDReg(MIB, SrcReg, ARM::dsub_1, 0); + MIB = AddDReg(MIB, SrcReg, ARM::dsub_2, 0); + MIB = AddDReg(MIB, SrcReg, ARM::dsub_3, 0); + MIB = AddDReg(MIB, SrcReg, ARM::dsub_4, 0); + MIB = AddDReg(MIB, SrcReg, ARM::dsub_5, 0); + MIB = AddDReg(MIB, SrcReg, ARM::dsub_6, 0); + AddDReg(MIB, SrcReg, ARM::dsub_7, 0); } else llvm_unreachable("Unknown reg class!"); break; @@ -1207,10 +1208,12 @@ Register ARMBaseInstrInfo::isStoreToStackSlotPostFE(const MachineInstr &MI, return false; } -void ARMBaseInstrInfo::loadRegFromStackSlot( - MachineBasicBlock &MBB, MachineBasicBlock::iterator I, Register DestReg, - int FI, const TargetRegisterClass *RC, const TargetRegisterInfo *TRI, - Register VReg, MachineInstr::MIFlag Flags) const { +void ARMBaseInstrInfo::loadRegFromStackSlot(MachineBasicBlock &MBB, + MachineBasicBlock::iterator I, + Register DestReg, int FI, + const TargetRegisterClass *RC, + Register VReg, + MachineInstr::MIFlag Flags) const { DebugLoc DL; if (I != MBB.end()) DL = I->getDebugLoc(); MachineFunction &MF = *MBB.getParent(); @@ -1220,7 +1223,8 @@ void ARMBaseInstrInfo::loadRegFromStackSlot( MachinePointerInfo::getFixedStack(MF, FI), MachineMemOperand::MOLoad, MFI.getObjectSize(FI), Alignment); - switch (TRI->getSpillSize(*RC)) { + const ARMBaseRegisterInfo &TRI = getRegisterInfo(); + switch (TRI.getSpillSize(*RC)) { case 2: if (ARM::HPRRegClass.hasSubClassEq(RC)) { BuildMI(MBB, I, DL, get(ARM::VLDRH), DestReg) @@ -1271,8 +1275,8 @@ void ARMBaseInstrInfo::loadRegFromStackSlot( if (Subtarget.hasV5TEOps()) { MIB = BuildMI(MBB, I, DL, get(ARM::LDRD)); - AddDReg(MIB, DestReg, ARM::gsub_0, RegState::DefineNoRead, TRI); - AddDReg(MIB, DestReg, ARM::gsub_1, RegState::DefineNoRead, TRI); + AddDReg(MIB, DestReg, ARM::gsub_0, RegState::DefineNoRead); + AddDReg(MIB, DestReg, ARM::gsub_1, RegState::DefineNoRead); MIB.addFrameIndex(FI).addReg(0).addImm(0).addMemOperand(MMO) .add(predOps(ARMCC::AL)); } else { @@ -1282,8 +1286,8 @@ void ARMBaseInstrInfo::loadRegFromStackSlot( .addFrameIndex(FI) .addMemOperand(MMO) .add(predOps(ARMCC::AL)); - MIB = AddDReg(MIB, DestReg, ARM::gsub_0, RegState::DefineNoRead, TRI); - MIB = AddDReg(MIB, DestReg, ARM::gsub_1, RegState::DefineNoRead, TRI); + MIB = AddDReg(MIB, DestReg, ARM::gsub_0, RegState::DefineNoRead); + MIB = AddDReg(MIB, DestReg, ARM::gsub_1, RegState::DefineNoRead); } if (DestReg.isPhysical()) @@ -1329,9 +1333,9 @@ void ARMBaseInstrInfo::loadRegFromStackSlot( .addFrameIndex(FI) .addMemOperand(MMO) .add(predOps(ARMCC::AL)); - MIB = AddDReg(MIB, DestReg, ARM::dsub_0, RegState::DefineNoRead, TRI); - MIB = AddDReg(MIB, DestReg, ARM::dsub_1, RegState::DefineNoRead, TRI); - MIB = AddDReg(MIB, DestReg, ARM::dsub_2, RegState::DefineNoRead, TRI); + MIB = AddDReg(MIB, DestReg, ARM::dsub_0, RegState::DefineNoRead); + MIB = AddDReg(MIB, DestReg, ARM::dsub_1, RegState::DefineNoRead); + MIB = AddDReg(MIB, DestReg, ARM::dsub_2, RegState::DefineNoRead); if (DestReg.isPhysical()) MIB.addReg(DestReg, RegState::ImplicitDefine); } @@ -1358,10 +1362,10 @@ void ARMBaseInstrInfo::loadRegFromStackSlot( .addFrameIndex(FI) .add(predOps(ARMCC::AL)) .addMemOperand(MMO); - MIB = AddDReg(MIB, DestReg, ARM::dsub_0, RegState::DefineNoRead, TRI); - MIB = AddDReg(MIB, DestReg, ARM::dsub_1, RegState::DefineNoRead, TRI); - MIB = AddDReg(MIB, DestReg, ARM::dsub_2, RegState::DefineNoRead, TRI); - MIB = AddDReg(MIB, DestReg, ARM::dsub_3, RegState::DefineNoRead, TRI); + MIB = AddDReg(MIB, DestReg, ARM::dsub_0, RegState::DefineNoRead); + MIB = AddDReg(MIB, DestReg, ARM::dsub_1, RegState::DefineNoRead); + MIB = AddDReg(MIB, DestReg, ARM::dsub_2, RegState::DefineNoRead); + MIB = AddDReg(MIB, DestReg, ARM::dsub_3, RegState::DefineNoRead); if (DestReg.isPhysical()) MIB.addReg(DestReg, RegState::ImplicitDefine); } @@ -1379,14 +1383,14 @@ void ARMBaseInstrInfo::loadRegFromStackSlot( .addFrameIndex(FI) .add(predOps(ARMCC::AL)) .addMemOperand(MMO); - MIB = AddDReg(MIB, DestReg, ARM::dsub_0, RegState::DefineNoRead, TRI); - MIB = AddDReg(MIB, DestReg, ARM::dsub_1, RegState::DefineNoRead, TRI); - MIB = AddDReg(MIB, DestReg, ARM::dsub_2, RegState::DefineNoRead, TRI); - MIB = AddDReg(MIB, DestReg, ARM::dsub_3, RegState::DefineNoRead, TRI); - MIB = AddDReg(MIB, DestReg, ARM::dsub_4, RegState::DefineNoRead, TRI); - MIB = AddDReg(MIB, DestReg, ARM::dsub_5, RegState::DefineNoRead, TRI); - MIB = AddDReg(MIB, DestReg, ARM::dsub_6, RegState::DefineNoRead, TRI); - MIB = AddDReg(MIB, DestReg, ARM::dsub_7, RegState::DefineNoRead, TRI); + MIB = AddDReg(MIB, DestReg, ARM::dsub_0, RegState::DefineNoRead); + MIB = AddDReg(MIB, DestReg, ARM::dsub_1, RegState::DefineNoRead); + MIB = AddDReg(MIB, DestReg, ARM::dsub_2, RegState::DefineNoRead); + MIB = AddDReg(MIB, DestReg, ARM::dsub_3, RegState::DefineNoRead); + MIB = AddDReg(MIB, DestReg, ARM::dsub_4, RegState::DefineNoRead); + MIB = AddDReg(MIB, DestReg, ARM::dsub_5, RegState::DefineNoRead); + MIB = AddDReg(MIB, DestReg, ARM::dsub_6, RegState::DefineNoRead); + MIB = AddDReg(MIB, DestReg, ARM::dsub_7, RegState::DefineNoRead); if (DestReg.isPhysical()) MIB.addReg(DestReg, RegState::ImplicitDefine); } else @@ -1652,8 +1656,7 @@ static unsigned duplicateCPV(MachineFunction &MF, unsigned &CPI) { void ARMBaseInstrInfo::reMaterialize(MachineBasicBlock &MBB, MachineBasicBlock::iterator I, Register DestReg, unsigned SubIdx, - const MachineInstr &Orig, - const TargetRegisterInfo &TRI) const { + const MachineInstr &Orig) const { unsigned Opcode = Orig.getOpcode(); switch (Opcode) { default: { diff --git a/llvm/lib/Target/ARM/ARMBaseInstrInfo.h b/llvm/lib/Target/ARM/ARMBaseInstrInfo.h index 2869e7f..04e2ab0 100644 --- a/llvm/lib/Target/ARM/ARMBaseInstrInfo.h +++ b/llvm/lib/Target/ARM/ARMBaseInstrInfo.h @@ -44,7 +44,8 @@ class ARMBaseInstrInfo : public ARMGenInstrInfo { protected: // Can be only subclassed. - explicit ARMBaseInstrInfo(const ARMSubtarget &STI); + explicit ARMBaseInstrInfo(const ARMSubtarget &STI, + const ARMBaseRegisterInfo &TRI); void expandLoadStackGuardBase(MachineBasicBlock::iterator MI, unsigned LoadImmOpc, unsigned LoadOpc) const; @@ -125,7 +126,11 @@ public: // if there is not such an opcode. virtual unsigned getUnindexedOpcode(unsigned Opc) const = 0; - virtual const ARMBaseRegisterInfo &getRegisterInfo() const = 0; + const ARMBaseRegisterInfo &getRegisterInfo() const { + return static_cast<const ARMBaseRegisterInfo &>( + TargetInstrInfo::getRegisterInfo()); + } + const ARMSubtarget &getSubtarget() const { return Subtarget; } ScheduleHazardRecognizer * @@ -211,14 +216,13 @@ public: void storeRegToStackSlot( MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI, Register SrcReg, - bool isKill, int FrameIndex, const TargetRegisterClass *RC, - const TargetRegisterInfo *TRI, Register VReg, + bool isKill, int FrameIndex, const TargetRegisterClass *RC, Register VReg, MachineInstr::MIFlag Flags = MachineInstr::NoFlags) const override; void loadRegFromStackSlot( MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI, Register DestReg, int FrameIndex, const TargetRegisterClass *RC, - const TargetRegisterInfo *TRI, Register VReg, + Register VReg, MachineInstr::MIFlag Flags = MachineInstr::NoFlags) const override; bool expandPostRAPseudo(MachineInstr &MI) const override; @@ -227,16 +231,14 @@ public: void reMaterialize(MachineBasicBlock &MBB, MachineBasicBlock::iterator MI, Register DestReg, unsigned SubIdx, - const MachineInstr &Orig, - const TargetRegisterInfo &TRI) const override; + const MachineInstr &Orig) const override; MachineInstr & duplicate(MachineBasicBlock &MBB, MachineBasicBlock::iterator InsertBefore, const MachineInstr &Orig) const override; const MachineInstrBuilder &AddDReg(MachineInstrBuilder &MIB, unsigned Reg, - unsigned SubIdx, unsigned State, - const TargetRegisterInfo *TRI) const; + unsigned SubIdx, unsigned State) const; bool produceSameValue(const MachineInstr &MI0, const MachineInstr &MI1, const MachineRegisterInfo *MRI) const override; diff --git a/llvm/lib/Target/ARM/ARMBaseRegisterInfo.cpp b/llvm/lib/Target/ARM/ARMBaseRegisterInfo.cpp index ce1cdb3..80921ce 100644 --- a/llvm/lib/Target/ARM/ARMBaseRegisterInfo.cpp +++ b/llvm/lib/Target/ARM/ARMBaseRegisterInfo.cpp @@ -708,7 +708,7 @@ ARMBaseRegisterInfo::materializeFrameBaseRegister(MachineBasicBlock *MBB, const TargetInstrInfo &TII = *MF.getSubtarget().getInstrInfo(); const MCInstrDesc &MCID = TII.get(ADDriOpc); Register BaseReg = MRI.createVirtualRegister(&ARM::GPRRegClass); - MRI.constrainRegClass(BaseReg, TII.getRegClass(MCID, 0, this)); + MRI.constrainRegClass(BaseReg, TII.getRegClass(MCID, 0)); MachineInstrBuilder MIB = BuildMI(*MBB, Ins, DL, MCID, BaseReg) .addFrameIndex(FrameIdx).addImm(Offset); @@ -881,8 +881,7 @@ ARMBaseRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II, Register PredReg = (PIdx == -1) ? Register() : MI.getOperand(PIdx+1).getReg(); const MCInstrDesc &MCID = MI.getDesc(); - const TargetRegisterClass *RegClass = - TII.getRegClass(MCID, FIOperandNum, this); + const TargetRegisterClass *RegClass = TII.getRegClass(MCID, FIOperandNum); if (Offset == 0 && (FrameReg.isVirtual() || RegClass->contains(FrameReg))) // Must be addrmode4/6. diff --git a/llvm/lib/Target/ARM/ARMFrameLowering.cpp b/llvm/lib/Target/ARM/ARMFrameLowering.cpp index 138981a..21a1135 100644 --- a/llvm/lib/Target/ARM/ARMFrameLowering.cpp +++ b/llvm/lib/Target/ARM/ARMFrameLowering.cpp @@ -2342,7 +2342,6 @@ static unsigned estimateRSStackSizeLimit(MachineFunction &MF, const ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>(); const ARMBaseInstrInfo &TII = *static_cast<const ARMBaseInstrInfo *>(MF.getSubtarget().getInstrInfo()); - const TargetRegisterInfo *TRI = MF.getSubtarget().getRegisterInfo(); unsigned Limit = (1 << 12) - 1; for (auto &MBB : MF) { for (auto &MI : MBB) { @@ -2364,7 +2363,7 @@ static unsigned estimateRSStackSizeLimit(MachineFunction &MF, break; const MCInstrDesc &MCID = MI.getDesc(); - const TargetRegisterClass *RegClass = TII.getRegClass(MCID, i, TRI); + const TargetRegisterClass *RegClass = TII.getRegClass(MCID, i); if (RegClass && !RegClass->contains(ARM::SP)) HasNonSPFrameIndex = true; diff --git a/llvm/lib/Target/ARM/ARMInstrInfo.cpp b/llvm/lib/Target/ARM/ARMInstrInfo.cpp index c684de7..f370547 100644 --- a/llvm/lib/Target/ARM/ARMInstrInfo.cpp +++ b/llvm/lib/Target/ARM/ARMInstrInfo.cpp @@ -25,7 +25,8 @@ #include "llvm/MC/MCInst.h" using namespace llvm; -ARMInstrInfo::ARMInstrInfo(const ARMSubtarget &STI) : ARMBaseInstrInfo(STI) {} +ARMInstrInfo::ARMInstrInfo(const ARMSubtarget &STI) + : ARMBaseInstrInfo(STI, RI) {} /// Return the noop instruction to use for a noop. MCInst ARMInstrInfo::getNop() const { diff --git a/llvm/lib/Target/ARM/ARMInstrInfo.h b/llvm/lib/Target/ARM/ARMInstrInfo.h index 178d7a2..9feaf14 100644 --- a/llvm/lib/Target/ARM/ARMInstrInfo.h +++ b/llvm/lib/Target/ARM/ARMInstrInfo.h @@ -35,7 +35,7 @@ public: /// such, whenever a client has an instance of instruction info, it should /// always be able to get register info as well (through this method). /// - const ARMRegisterInfo &getRegisterInfo() const override { return RI; } + const ARMRegisterInfo &getRegisterInfo() const { return RI; } private: void expandLoadStackGuard(MachineBasicBlock::iterator MI) const override; diff --git a/llvm/lib/Target/ARM/ARMLoadStoreOptimizer.cpp b/llvm/lib/Target/ARM/ARMLoadStoreOptimizer.cpp index cd4299b..db37b76 100644 --- a/llvm/lib/Target/ARM/ARMLoadStoreOptimizer.cpp +++ b/llvm/lib/Target/ARM/ARMLoadStoreOptimizer.cpp @@ -2424,7 +2424,7 @@ bool ARMPreAllocLoadStoreOpt::RescheduleOps( Ops.pop_back(); const MCInstrDesc &MCID = TII->get(NewOpc); - const TargetRegisterClass *TRC = TII->getRegClass(MCID, 0, TRI); + const TargetRegisterClass *TRC = TII->getRegClass(MCID, 0); MRI->constrainRegClass(FirstReg, TRC); MRI->constrainRegClass(SecondReg, TRC); @@ -3014,7 +3014,7 @@ static void AdjustBaseAndOffset(MachineInstr *MI, Register NewBaseReg, MachineFunction *MF = MI->getMF(); MachineRegisterInfo &MRI = MF->getRegInfo(); const MCInstrDesc &MCID = TII->get(MI->getOpcode()); - const TargetRegisterClass *TRC = TII->getRegClass(MCID, BaseOp, TRI); + const TargetRegisterClass *TRC = TII->getRegClass(MCID, BaseOp); MRI.constrainRegClass(NewBaseReg, TRC); int OldOffset = MI->getOperand(BaseOp + 1).getImm(); @@ -3071,10 +3071,10 @@ static MachineInstr *createPostIncLoadStore(MachineInstr *MI, int Offset, const MCInstrDesc &MCID = TII->get(NewOpcode); // Constrain the def register class - const TargetRegisterClass *TRC = TII->getRegClass(MCID, 0, TRI); + const TargetRegisterClass *TRC = TII->getRegClass(MCID, 0); MRI.constrainRegClass(NewReg, TRC); // And do the same for the base operand - TRC = TII->getRegClass(MCID, 2, TRI); + TRC = TII->getRegClass(MCID, 2); MRI.constrainRegClass(MI->getOperand(1).getReg(), TRC); unsigned AddrMode = (MCID.TSFlags & ARMII::AddrModeMask); diff --git a/llvm/lib/Target/ARM/ARMSubtarget.h b/llvm/lib/Target/ARM/ARMSubtarget.h index 4a0883c..34baa31 100644 --- a/llvm/lib/Target/ARM/ARMSubtarget.h +++ b/llvm/lib/Target/ARM/ARMSubtarget.h @@ -377,6 +377,7 @@ public: bool isRWPI() const; bool useMachineScheduler() const { return UseMISched; } + bool enableTerminalRule() const override { return true; } bool useMachinePipeliner() const { return UseMIPipeliner; } bool hasMinSize() const { return OptMinSize; } bool isThumb1Only() const { return isThumb() && !hasThumb2(); } diff --git a/llvm/lib/Target/ARM/MLxExpansionPass.cpp b/llvm/lib/Target/ARM/MLxExpansionPass.cpp index 8e1bf1d..eb237b4 100644 --- a/llvm/lib/Target/ARM/MLxExpansionPass.cpp +++ b/llvm/lib/Target/ARM/MLxExpansionPass.cpp @@ -283,7 +283,7 @@ MLxExpansion::ExpandFPMLxInstruction(MachineBasicBlock &MBB, MachineInstr *MI, const MCInstrDesc &MCID1 = TII->get(MulOpc); const MCInstrDesc &MCID2 = TII->get(AddSubOpc); - Register TmpReg = MRI->createVirtualRegister(TII->getRegClass(MCID1, 0, TRI)); + Register TmpReg = MRI->createVirtualRegister(TII->getRegClass(MCID1, 0)); MachineInstrBuilder MIB = BuildMI(MBB, MI, MI->getDebugLoc(), MCID1, TmpReg) .addReg(Src1Reg, getKillRegState(Src1Kill)) diff --git a/llvm/lib/Target/ARM/Thumb1InstrInfo.cpp b/llvm/lib/Target/ARM/Thumb1InstrInfo.cpp index 4b8c2fd..01f588f 100644 --- a/llvm/lib/Target/ARM/Thumb1InstrInfo.cpp +++ b/llvm/lib/Target/ARM/Thumb1InstrInfo.cpp @@ -24,7 +24,7 @@ using namespace llvm; Thumb1InstrInfo::Thumb1InstrInfo(const ARMSubtarget &STI) - : ARMBaseInstrInfo(STI), RI(STI) {} + : ARMBaseInstrInfo(STI, RI), RI(STI) {} /// Return the noop instruction to use for a noop. MCInst Thumb1InstrInfo::getNop() const { @@ -116,7 +116,6 @@ void Thumb1InstrInfo::storeRegToStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator I, Register SrcReg, bool isKill, int FI, const TargetRegisterClass *RC, - const TargetRegisterInfo *TRI, Register VReg, MachineInstr::MIFlag Flags) const { assert((RC == &ARM::tGPRRegClass || @@ -142,10 +141,12 @@ void Thumb1InstrInfo::storeRegToStackSlot(MachineBasicBlock &MBB, } } -void Thumb1InstrInfo::loadRegFromStackSlot( - MachineBasicBlock &MBB, MachineBasicBlock::iterator I, Register DestReg, - int FI, const TargetRegisterClass *RC, const TargetRegisterInfo *TRI, - Register VReg, MachineInstr::MIFlag Flags) const { +void Thumb1InstrInfo::loadRegFromStackSlot(MachineBasicBlock &MBB, + MachineBasicBlock::iterator I, + Register DestReg, int FI, + const TargetRegisterClass *RC, + Register VReg, + MachineInstr::MIFlag Flags) const { assert((RC->hasSuperClassEq(&ARM::tGPRRegClass) || (DestReg.isPhysical() && isARMLowRegister(DestReg))) && "Unknown regclass!"); diff --git a/llvm/lib/Target/ARM/Thumb1InstrInfo.h b/llvm/lib/Target/ARM/Thumb1InstrInfo.h index 68b326c..289a30a 100644 --- a/llvm/lib/Target/ARM/Thumb1InstrInfo.h +++ b/llvm/lib/Target/ARM/Thumb1InstrInfo.h @@ -35,7 +35,7 @@ public: /// such, whenever a client has an instance of instruction info, it should /// always be able to get register info as well (through this method). /// - const ThumbRegisterInfo &getRegisterInfo() const override { return RI; } + const ThumbRegisterInfo &getRegisterInfo() const { return RI; } void copyPhysReg(MachineBasicBlock &MBB, MachineBasicBlock::iterator I, const DebugLoc &DL, Register DestReg, Register SrcReg, @@ -43,14 +43,13 @@ public: bool RenamableSrc = false) const override; void storeRegToStackSlot( MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI, Register SrcReg, - bool isKill, int FrameIndex, const TargetRegisterClass *RC, - const TargetRegisterInfo *TRI, Register VReg, + bool isKill, int FrameIndex, const TargetRegisterClass *RC, Register VReg, MachineInstr::MIFlag Flags = MachineInstr::NoFlags) const override; void loadRegFromStackSlot( MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI, Register DestReg, int FrameIndex, const TargetRegisterClass *RC, - const TargetRegisterInfo *TRI, Register VReg, + Register VReg, MachineInstr::MIFlag Flags = MachineInstr::NoFlags) const override; bool canCopyGluedNodeDuringSchedule(SDNode *N) const override; diff --git a/llvm/lib/Target/ARM/Thumb2InstrInfo.cpp b/llvm/lib/Target/ARM/Thumb2InstrInfo.cpp index f5653d4..efb92c9 100644 --- a/llvm/lib/Target/ARM/Thumb2InstrInfo.cpp +++ b/llvm/lib/Target/ARM/Thumb2InstrInfo.cpp @@ -46,7 +46,7 @@ PreferNoCSEL("prefer-no-csel", cl::Hidden, cl::init(false)); Thumb2InstrInfo::Thumb2InstrInfo(const ARMSubtarget &STI) - : ARMBaseInstrInfo(STI), RI(STI) {} + : ARMBaseInstrInfo(STI, RI), RI(STI) {} /// Return the noop instruction to use for a noop. MCInst Thumb2InstrInfo::getNop() const { @@ -165,7 +165,6 @@ void Thumb2InstrInfo::storeRegToStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator I, Register SrcReg, bool isKill, int FI, const TargetRegisterClass *RC, - const TargetRegisterInfo *TRI, Register VReg, MachineInstr::MIFlag Flags) const { DebugLoc DL; @@ -197,20 +196,22 @@ void Thumb2InstrInfo::storeRegToStackSlot(MachineBasicBlock &MBB, } MachineInstrBuilder MIB = BuildMI(MBB, I, DL, get(ARM::t2STRDi8)); - AddDReg(MIB, SrcReg, ARM::gsub_0, getKillRegState(isKill), TRI); - AddDReg(MIB, SrcReg, ARM::gsub_1, 0, TRI); + AddDReg(MIB, SrcReg, ARM::gsub_0, getKillRegState(isKill)); + AddDReg(MIB, SrcReg, ARM::gsub_1, 0); MIB.addFrameIndex(FI).addImm(0).addMemOperand(MMO).add(predOps(ARMCC::AL)); return; } - ARMBaseInstrInfo::storeRegToStackSlot(MBB, I, SrcReg, isKill, FI, RC, TRI, + ARMBaseInstrInfo::storeRegToStackSlot(MBB, I, SrcReg, isKill, FI, RC, Register()); } -void Thumb2InstrInfo::loadRegFromStackSlot( - MachineBasicBlock &MBB, MachineBasicBlock::iterator I, Register DestReg, - int FI, const TargetRegisterClass *RC, const TargetRegisterInfo *TRI, - Register VReg, MachineInstr::MIFlag Flags) const { +void Thumb2InstrInfo::loadRegFromStackSlot(MachineBasicBlock &MBB, + MachineBasicBlock::iterator I, + Register DestReg, int FI, + const TargetRegisterClass *RC, + Register VReg, + MachineInstr::MIFlag Flags) const { MachineFunction &MF = *MBB.getParent(); MachineFrameInfo &MFI = MF.getFrameInfo(); MachineMemOperand *MMO = MF.getMachineMemOperand( @@ -238,8 +239,8 @@ void Thumb2InstrInfo::loadRegFromStackSlot( } MachineInstrBuilder MIB = BuildMI(MBB, I, DL, get(ARM::t2LDRDi8)); - AddDReg(MIB, DestReg, ARM::gsub_0, RegState::DefineNoRead, TRI); - AddDReg(MIB, DestReg, ARM::gsub_1, RegState::DefineNoRead, TRI); + AddDReg(MIB, DestReg, ARM::gsub_0, RegState::DefineNoRead); + AddDReg(MIB, DestReg, ARM::gsub_1, RegState::DefineNoRead); MIB.addFrameIndex(FI).addImm(0).addMemOperand(MMO).add(predOps(ARMCC::AL)); if (DestReg.isPhysical()) @@ -247,8 +248,7 @@ void Thumb2InstrInfo::loadRegFromStackSlot( return; } - ARMBaseInstrInfo::loadRegFromStackSlot(MBB, I, DestReg, FI, RC, TRI, - Register()); + ARMBaseInstrInfo::loadRegFromStackSlot(MBB, I, DestReg, FI, RC, Register()); } void Thumb2InstrInfo::expandLoadStackGuard( @@ -564,7 +564,7 @@ bool llvm::rewriteT2FrameIndex(MachineInstr &MI, unsigned FrameRegIdx, bool isSub = false; MachineFunction &MF = *MI.getParent()->getParent(); - const TargetRegisterClass *RegClass = TII.getRegClass(Desc, FrameRegIdx, TRI); + const TargetRegisterClass *RegClass = TII.getRegClass(Desc, FrameRegIdx); // Memory operands in inline assembly always use AddrModeT2_i12. if (Opcode == ARM::INLINEASM || Opcode == ARM::INLINEASM_BR) diff --git a/llvm/lib/Target/ARM/Thumb2InstrInfo.h b/llvm/lib/Target/ARM/Thumb2InstrInfo.h index 1b0bf2d..1e11cb3 100644 --- a/llvm/lib/Target/ARM/Thumb2InstrInfo.h +++ b/llvm/lib/Target/ARM/Thumb2InstrInfo.h @@ -44,21 +44,20 @@ public: void storeRegToStackSlot( MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI, Register SrcReg, - bool isKill, int FrameIndex, const TargetRegisterClass *RC, - const TargetRegisterInfo *TRI, Register VReg, + bool isKill, int FrameIndex, const TargetRegisterClass *RC, Register VReg, MachineInstr::MIFlag Flags = MachineInstr::NoFlags) const override; void loadRegFromStackSlot( MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI, Register DestReg, int FrameIndex, const TargetRegisterClass *RC, - const TargetRegisterInfo *TRI, Register VReg, + Register VReg, MachineInstr::MIFlag Flags = MachineInstr::NoFlags) const override; /// getRegisterInfo - TargetInstrInfo is a superset of MRegister info. As /// such, whenever a client has an instance of instruction info, it should /// always be able to get register info as well (through this method). /// - const ThumbRegisterInfo &getRegisterInfo() const override { return RI; } + const ThumbRegisterInfo &getRegisterInfo() const { return RI; } MachineInstr *optimizeSelect(MachineInstr &MI, SmallPtrSetImpl<MachineInstr *> &SeenMIs, diff --git a/llvm/lib/Target/AVR/AVRInstrInfo.cpp b/llvm/lib/Target/AVR/AVRInstrInfo.cpp index ce99085..6c37ba1 100644 --- a/llvm/lib/Target/AVR/AVRInstrInfo.cpp +++ b/llvm/lib/Target/AVR/AVRInstrInfo.cpp @@ -30,8 +30,8 @@ namespace llvm { AVRInstrInfo::AVRInstrInfo(const AVRSubtarget &STI) - : AVRGenInstrInfo(STI, AVR::ADJCALLSTACKDOWN, AVR::ADJCALLSTACKUP), RI(), - STI(STI) {} + : AVRGenInstrInfo(STI, RI, AVR::ADJCALLSTACKDOWN, AVR::ADJCALLSTACKUP), + RI(), STI(STI) {} void AVRInstrInfo::copyPhysReg(MachineBasicBlock &MBB, MachineBasicBlock::iterator MI, @@ -126,8 +126,7 @@ Register AVRInstrInfo::isStoreToStackSlot(const MachineInstr &MI, void AVRInstrInfo::storeRegToStackSlot( MachineBasicBlock &MBB, MachineBasicBlock::iterator MI, Register SrcReg, - bool isKill, int FrameIndex, const TargetRegisterClass *RC, - const TargetRegisterInfo *TRI, Register VReg, + bool isKill, int FrameIndex, const TargetRegisterClass *RC, Register VReg, MachineInstr::MIFlag Flags) const { MachineFunction &MF = *MBB.getParent(); AVRMachineFunctionInfo *AFI = MF.getInfo<AVRMachineFunctionInfo>(); @@ -142,9 +141,9 @@ void AVRInstrInfo::storeRegToStackSlot( MFI.getObjectAlign(FrameIndex)); unsigned Opcode = 0; - if (TRI->isTypeLegalForClass(*RC, MVT::i8)) { + if (RI.isTypeLegalForClass(*RC, MVT::i8)) { Opcode = AVR::STDPtrQRr; - } else if (TRI->isTypeLegalForClass(*RC, MVT::i16)) { + } else if (RI.isTypeLegalForClass(*RC, MVT::i16)) { Opcode = AVR::STDWPtrQRr; } else { llvm_unreachable("Cannot store this register into a stack slot!"); @@ -161,7 +160,6 @@ void AVRInstrInfo::loadRegFromStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator MI, Register DestReg, int FrameIndex, const TargetRegisterClass *RC, - const TargetRegisterInfo *TRI, Register VReg, MachineInstr::MIFlag Flags) const { MachineFunction &MF = *MBB.getParent(); @@ -173,9 +171,9 @@ void AVRInstrInfo::loadRegFromStackSlot(MachineBasicBlock &MBB, MFI.getObjectAlign(FrameIndex)); unsigned Opcode = 0; - if (TRI->isTypeLegalForClass(*RC, MVT::i8)) { + if (TRI.isTypeLegalForClass(*RC, MVT::i8)) { Opcode = AVR::LDDRdPtrQ; - } else if (TRI->isTypeLegalForClass(*RC, MVT::i16)) { + } else if (TRI.isTypeLegalForClass(*RC, MVT::i16)) { // Opcode = AVR::LDDWRdPtrQ; //: FIXME: remove this once PR13375 gets fixed Opcode = AVR::LDDWRdYQ; diff --git a/llvm/lib/Target/AVR/AVRInstrInfo.h b/llvm/lib/Target/AVR/AVRInstrInfo.h index 759aea2..4db535a 100644 --- a/llvm/lib/Target/AVR/AVRInstrInfo.h +++ b/llvm/lib/Target/AVR/AVRInstrInfo.h @@ -79,13 +79,11 @@ public: bool RenamableSrc = false) const override; void storeRegToStackSlot( MachineBasicBlock &MBB, MachineBasicBlock::iterator MI, Register SrcReg, - bool isKill, int FrameIndex, const TargetRegisterClass *RC, - const TargetRegisterInfo *TRI, Register VReg, + bool isKill, int FrameIndex, const TargetRegisterClass *RC, Register VReg, MachineInstr::MIFlag Flags = MachineInstr::NoFlags) const override; void loadRegFromStackSlot( MachineBasicBlock &MBB, MachineBasicBlock::iterator MI, Register DestReg, - int FrameIndex, const TargetRegisterClass *RC, - const TargetRegisterInfo *TRI, Register VReg, + int FrameIndex, const TargetRegisterClass *RC, Register VReg, MachineInstr::MIFlag Flags = MachineInstr::NoFlags) const override; Register isLoadFromStackSlot(const MachineInstr &MI, int &FrameIndex) const override; diff --git a/llvm/lib/Target/BPF/BPFInstrInfo.cpp b/llvm/lib/Target/BPF/BPFInstrInfo.cpp index 409f8b4..095e249 100644 --- a/llvm/lib/Target/BPF/BPFInstrInfo.cpp +++ b/llvm/lib/Target/BPF/BPFInstrInfo.cpp @@ -27,7 +27,7 @@ using namespace llvm; BPFInstrInfo::BPFInstrInfo(const BPFSubtarget &STI) - : BPFGenInstrInfo(STI, BPF::ADJCALLSTACKDOWN, BPF::ADJCALLSTACKUP) {} + : BPFGenInstrInfo(STI, RI, BPF::ADJCALLSTACKDOWN, BPF::ADJCALLSTACKUP) {} void BPFInstrInfo::copyPhysReg(MachineBasicBlock &MBB, MachineBasicBlock::iterator I, @@ -127,7 +127,6 @@ void BPFInstrInfo::storeRegToStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator I, Register SrcReg, bool IsKill, int FI, const TargetRegisterClass *RC, - const TargetRegisterInfo *TRI, Register VReg, MachineInstr::MIFlag Flags) const { DebugLoc DL; @@ -148,10 +147,12 @@ void BPFInstrInfo::storeRegToStackSlot(MachineBasicBlock &MBB, llvm_unreachable("Can't store this register to stack slot"); } -void BPFInstrInfo::loadRegFromStackSlot( - MachineBasicBlock &MBB, MachineBasicBlock::iterator I, Register DestReg, - int FI, const TargetRegisterClass *RC, const TargetRegisterInfo *TRI, - Register VReg, MachineInstr::MIFlag Flags) const { +void BPFInstrInfo::loadRegFromStackSlot(MachineBasicBlock &MBB, + MachineBasicBlock::iterator I, + Register DestReg, int FI, + const TargetRegisterClass *RC, + Register VReg, + MachineInstr::MIFlag Flags) const { DebugLoc DL; if (I != MBB.end()) DL = I->getDebugLoc(); diff --git a/llvm/lib/Target/BPF/BPFInstrInfo.h b/llvm/lib/Target/BPF/BPFInstrInfo.h index 911e880..d3ef9bc 100644 --- a/llvm/lib/Target/BPF/BPFInstrInfo.h +++ b/llvm/lib/Target/BPF/BPFInstrInfo.h @@ -39,14 +39,13 @@ public: void storeRegToStackSlot( MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI, Register SrcReg, - bool isKill, int FrameIndex, const TargetRegisterClass *RC, - const TargetRegisterInfo *TRI, Register VReg, + bool isKill, int FrameIndex, const TargetRegisterClass *RC, Register VReg, MachineInstr::MIFlag Flags = MachineInstr::NoFlags) const override; void loadRegFromStackSlot( MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI, Register DestReg, int FrameIndex, const TargetRegisterClass *RC, - const TargetRegisterInfo *TRI, Register VReg, + Register VReg, MachineInstr::MIFlag Flags = MachineInstr::NoFlags) const override; bool analyzeBranch(MachineBasicBlock &MBB, MachineBasicBlock *&TBB, MachineBasicBlock *&FBB, diff --git a/llvm/lib/Target/CSKY/CSKYInstrInfo.cpp b/llvm/lib/Target/CSKY/CSKYInstrInfo.cpp index 619a797..34a7de8 100644 --- a/llvm/lib/Target/CSKY/CSKYInstrInfo.cpp +++ b/llvm/lib/Target/CSKY/CSKYInstrInfo.cpp @@ -25,7 +25,7 @@ using namespace llvm; #include "CSKYGenInstrInfo.inc" CSKYInstrInfo::CSKYInstrInfo(const CSKYSubtarget &STI) - : CSKYGenInstrInfo(STI, CSKY::ADJCALLSTACKDOWN, CSKY::ADJCALLSTACKUP), + : CSKYGenInstrInfo(STI, RI, CSKY::ADJCALLSTACKDOWN, CSKY::ADJCALLSTACKUP), STI(STI) { v2sf = STI.hasFPUv2SingleFloat(); v2df = STI.hasFPUv2DoubleFloat(); diff --git a/llvm/lib/Target/DirectX/DXILResourceAccess.cpp b/llvm/lib/Target/DirectX/DXILResourceAccess.cpp index 6579d34..057d87b 100644 --- a/llvm/lib/Target/DirectX/DXILResourceAccess.cpp +++ b/llvm/lib/Target/DirectX/DXILResourceAccess.cpp @@ -10,6 +10,7 @@ #include "DirectX.h" #include "llvm/ADT/SetVector.h" #include "llvm/Analysis/DXILResource.h" +#include "llvm/Frontend/HLSL/HLSLResource.h" #include "llvm/IR/BasicBlock.h" #include "llvm/IR/Dominators.h" #include "llvm/IR/IRBuilder.h" @@ -20,6 +21,7 @@ #include "llvm/IR/IntrinsicsDirectX.h" #include "llvm/IR/User.h" #include "llvm/InitializePasses.h" +#include "llvm/Support/FormatVariadic.h" #include "llvm/Transforms/Utils/ValueMapper.h" #define DEBUG_TYPE "dxil-resource-access" @@ -44,16 +46,28 @@ static Value *calculateGEPOffset(GetElementPtrInst *GEP, Value *PrevOffset, APInt ConstantOffset(DL.getIndexTypeSizeInBits(GEP->getType()), 0); if (GEP->accumulateConstantOffset(DL, ConstantOffset)) { APInt Scaled = ConstantOffset.udiv(ScalarSize); - return ConstantInt::get(Type::getInt32Ty(GEP->getContext()), Scaled); + return ConstantInt::get(DL.getIndexType(GEP->getType()), Scaled); } - auto IndexIt = GEP->idx_begin(); - assert(cast<ConstantInt>(IndexIt)->getZExtValue() == 0 && - "GEP is not indexing through pointer"); - ++IndexIt; - Value *Offset = *IndexIt; - assert(++IndexIt == GEP->idx_end() && "Too many indices in GEP"); - return Offset; + unsigned NumIndices = GEP->getNumIndices(); + + // If we have a single index we're indexing into a top level array. This + // generally only happens with cbuffers. + if (NumIndices == 1) + return *GEP->idx_begin(); + + // If we have two indices, this should be a simple access through a pointer. + if (NumIndices == 2) { + auto IndexIt = GEP->idx_begin(); + assert(cast<ConstantInt>(IndexIt)->getZExtValue() == 0 && + "GEP is not indexing through pointer"); + ++IndexIt; + Value *Offset = *IndexIt; + assert(++IndexIt == GEP->idx_end() && "Too many indices in GEP"); + return Offset; + } + + llvm_unreachable("Unhandled GEP structure for resource access"); } static void createTypedBufferStore(IntrinsicInst *II, StoreInst *SI, @@ -171,6 +185,127 @@ static void createRawLoad(IntrinsicInst *II, LoadInst *LI, Value *Offset) { LI->replaceAllUsesWith(V); } +namespace { +/// Helper for building a `load.cbufferrow` intrinsic given a simple type. +struct CBufferRowIntrin { + Intrinsic::ID IID; + Type *RetTy; + unsigned int EltSize; + unsigned int NumElts; + + CBufferRowIntrin(const DataLayout &DL, Type *Ty) { + assert(Ty == Ty->getScalarType() && "Expected scalar type"); + + switch (DL.getTypeSizeInBits(Ty)) { + case 16: + IID = Intrinsic::dx_resource_load_cbufferrow_8; + RetTy = StructType::get(Ty, Ty, Ty, Ty, Ty, Ty, Ty, Ty); + EltSize = 2; + NumElts = 8; + break; + case 32: + IID = Intrinsic::dx_resource_load_cbufferrow_4; + RetTy = StructType::get(Ty, Ty, Ty, Ty); + EltSize = 4; + NumElts = 4; + break; + case 64: + IID = Intrinsic::dx_resource_load_cbufferrow_2; + RetTy = StructType::get(Ty, Ty); + EltSize = 8; + NumElts = 2; + break; + default: + llvm_unreachable("Only 16, 32, and 64 bit types supported"); + } + } +}; +} // namespace + +static void createCBufferLoad(IntrinsicInst *II, LoadInst *LI, Value *Offset, + dxil::ResourceTypeInfo &RTI) { + const DataLayout &DL = LI->getDataLayout(); + + Type *Ty = LI->getType(); + assert(!isa<StructType>(Ty) && "Structs not handled yet"); + CBufferRowIntrin Intrin(DL, Ty->getScalarType()); + + StringRef Name = LI->getName(); + Value *Handle = II->getOperand(0); + + IRBuilder<> Builder(LI); + + ConstantInt *GlobalOffset = dyn_cast<ConstantInt>(II->getOperand(1)); + assert(GlobalOffset && "CBuffer getpointer index must be constant"); + + unsigned int FixedOffset = GlobalOffset->getZExtValue(); + // If we have a further constant offset we can just fold it in to the fixed + // offset. + if (auto *ConstOffset = dyn_cast_if_present<ConstantInt>(Offset)) { + FixedOffset += ConstOffset->getZExtValue(); + Offset = nullptr; + } + + Value *CurrentRow = ConstantInt::get( + Builder.getInt32Ty(), FixedOffset / hlsl::CBufferRowSizeInBytes); + unsigned int CurrentIndex = + (FixedOffset % hlsl::CBufferRowSizeInBytes) / Intrin.EltSize; + + assert(!(CurrentIndex && Offset) && + "Dynamic indexing into elements of cbuffer rows is not supported"); + // At this point if we have a non-constant offset it has to be an array + // offset, so we can assume that it's a multiple of the row size. + if (Offset) + CurrentRow = FixedOffset ? Builder.CreateAdd(CurrentRow, Offset) : Offset; + + auto *CBufLoad = Builder.CreateIntrinsic( + Intrin.RetTy, Intrin.IID, {Handle, CurrentRow}, nullptr, Name + ".load"); + auto *Elt = + Builder.CreateExtractValue(CBufLoad, {CurrentIndex++}, Name + ".extract"); + + // At this point we've loaded the first scalar of our result, but our original + // type may have been a vector. + unsigned int Remaining = + ((DL.getTypeSizeInBits(Ty) / 8) / Intrin.EltSize) - 1; + if (Remaining == 0) { + // We only have a single element, so we're done. + Value *Result = Elt; + + // However, if we loaded a <1 x T>, then we need to adjust the type. + if (auto *VT = dyn_cast<FixedVectorType>(Ty)) { + assert(VT->getNumElements() == 1 && "Can't have multiple elements here"); + Result = Builder.CreateInsertElement(PoisonValue::get(VT), Result, + Builder.getInt32(0), Name); + } + LI->replaceAllUsesWith(Result); + return; + } + + // Walk each element and extract it, wrapping to new rows as needed. + SmallVector<Value *> Extracts{Elt}; + while (Remaining--) { + CurrentIndex %= Intrin.NumElts; + + if (CurrentIndex == 0) { + CurrentRow = Builder.CreateAdd(CurrentRow, + ConstantInt::get(Builder.getInt32Ty(), 1)); + CBufLoad = Builder.CreateIntrinsic(Intrin.RetTy, Intrin.IID, + {Handle, CurrentRow}, nullptr, + Name + ".load"); + } + + Extracts.push_back(Builder.CreateExtractValue(CBufLoad, {CurrentIndex++}, + Name + ".extract")); + } + + // Finally, we build up the original loaded value. + Value *Result = PoisonValue::get(Ty); + for (int I = 0, E = Extracts.size(); I < E; ++I) + Result = Builder.CreateInsertElement( + Result, Extracts[I], Builder.getInt32(I), Name + formatv(".upto{}", I)); + LI->replaceAllUsesWith(Result); +} + static void createLoadIntrinsic(IntrinsicInst *II, LoadInst *LI, Value *Offset, dxil::ResourceTypeInfo &RTI) { switch (RTI.getResourceKind()) { @@ -179,6 +314,8 @@ static void createLoadIntrinsic(IntrinsicInst *II, LoadInst *LI, Value *Offset, case dxil::ResourceKind::RawBuffer: case dxil::ResourceKind::StructuredBuffer: return createRawLoad(II, LI, Offset); + case dxil::ResourceKind::CBuffer: + return createCBufferLoad(II, LI, Offset, RTI); case dxil::ResourceKind::Texture1D: case dxil::ResourceKind::Texture2D: case dxil::ResourceKind::Texture2DMS: @@ -190,9 +327,8 @@ static void createLoadIntrinsic(IntrinsicInst *II, LoadInst *LI, Value *Offset, case dxil::ResourceKind::TextureCubeArray: case dxil::ResourceKind::FeedbackTexture2D: case dxil::ResourceKind::FeedbackTexture2DArray: - case dxil::ResourceKind::CBuffer: case dxil::ResourceKind::TBuffer: - // TODO: handle these + reportFatalUsageError("Load not yet implemented for resource type"); return; case dxil::ResourceKind::Sampler: case dxil::ResourceKind::RTAccelerationStructure: diff --git a/llvm/lib/Target/DirectX/DXILWriter/DXILBitcodeWriter.cpp b/llvm/lib/Target/DirectX/DXILWriter/DXILBitcodeWriter.cpp index 26a8728..48a9085 100644 --- a/llvm/lib/Target/DirectX/DXILWriter/DXILBitcodeWriter.cpp +++ b/llvm/lib/Target/DirectX/DXILWriter/DXILBitcodeWriter.cpp @@ -1169,8 +1169,8 @@ void DXILBitcodeWriter::writeModuleInfo() { // We need to hardcode a triple and datalayout that's compatible with the // historical DXIL triple and datalayout from DXC. StringRef Triple = "dxil-ms-dx"; - StringRef DL = "e-m:e-p:32:32-i1:8-i8:8-i16:32-i32:32-i64:64-" - "f16:32-f32:32-f64:64-n8:16:32:64"; + StringRef DL = "e-m:e-p:32:32-i1:32-i8:8-i16:16-i32:32-i64:64-" + "f16:16-f32:32-f64:64-n8:16:32:64"; writeStringRecord(Stream, bitc::MODULE_CODE_TRIPLE, Triple, 0 /*TODO*/); writeStringRecord(Stream, bitc::MODULE_CODE_DATALAYOUT, DL, 0 /*TODO*/); diff --git a/llvm/lib/Target/DirectX/DirectXInstrInfo.cpp b/llvm/lib/Target/DirectX/DirectXInstrInfo.cpp index bb2efa4..401881d 100644 --- a/llvm/lib/Target/DirectX/DirectXInstrInfo.cpp +++ b/llvm/lib/Target/DirectX/DirectXInstrInfo.cpp @@ -19,6 +19,6 @@ using namespace llvm; DirectXInstrInfo::DirectXInstrInfo(const DirectXSubtarget &STI) - : DirectXGenInstrInfo(STI) {} + : DirectXGenInstrInfo(STI, RI) {} DirectXInstrInfo::~DirectXInstrInfo() {} diff --git a/llvm/lib/Target/Hexagon/HexagonBitSimplify.cpp b/llvm/lib/Target/Hexagon/HexagonBitSimplify.cpp index 68f5312..b378ce4 100644 --- a/llvm/lib/Target/Hexagon/HexagonBitSimplify.cpp +++ b/llvm/lib/Target/Hexagon/HexagonBitSimplify.cpp @@ -1886,7 +1886,7 @@ bool BitSimplification::matchHalf(unsigned SelfR, bool BitSimplification::validateReg(BitTracker::RegisterRef R, unsigned Opc, unsigned OpNum) { - auto *OpRC = HII.getRegClass(HII.get(Opc), OpNum, &HRI); + auto *OpRC = HII.getRegClass(HII.get(Opc), OpNum); auto *RRC = HBS::getFinalVRegClass(R, MRI); return OpRC->hasSubClassEq(RRC); } diff --git a/llvm/lib/Target/Hexagon/HexagonFrameLowering.cpp b/llvm/lib/Target/Hexagon/HexagonFrameLowering.cpp index dd343d9..df61226 100644 --- a/llvm/lib/Target/Hexagon/HexagonFrameLowering.cpp +++ b/llvm/lib/Target/Hexagon/HexagonFrameLowering.cpp @@ -1405,7 +1405,7 @@ bool HexagonFrameLowering::insertCSRSpillsInBlock(MachineBasicBlock &MBB, bool IsKill = !HRI.isEHReturnCalleeSaveReg(Reg); int FI = I.getFrameIdx(); const TargetRegisterClass *RC = HRI.getMinimalPhysRegClass(Reg); - HII.storeRegToStackSlot(MBB, MI, Reg, IsKill, FI, RC, &HRI, Register()); + HII.storeRegToStackSlot(MBB, MI, Reg, IsKill, FI, RC, Register()); if (IsKill) MBB.addLiveIn(Reg); } @@ -1470,7 +1470,7 @@ bool HexagonFrameLowering::insertCSRRestoresInBlock(MachineBasicBlock &MBB, MCRegister Reg = I.getReg(); const TargetRegisterClass *RC = HRI.getMinimalPhysRegClass(Reg); int FI = I.getFrameIdx(); - HII.loadRegFromStackSlot(MBB, MI, Reg, FI, RC, &HRI, Register()); + HII.loadRegFromStackSlot(MBB, MI, Reg, FI, RC, Register()); } return true; @@ -1814,8 +1814,7 @@ bool HexagonFrameLowering::expandStoreVecPred(MachineBasicBlock &B, .addReg(SrcR, getKillRegState(IsKill)) .addReg(TmpR0, RegState::Kill); - auto *HRI = B.getParent()->getSubtarget<HexagonSubtarget>().getRegisterInfo(); - HII.storeRegToStackSlot(B, It, TmpR1, true, FI, RC, HRI, Register()); + HII.storeRegToStackSlot(B, It, TmpR1, true, FI, RC, Register()); expandStoreVec(B, std::prev(It), MRI, HII, NewRegs); NewRegs.push_back(TmpR0); @@ -1844,9 +1843,7 @@ bool HexagonFrameLowering::expandLoadVecPred(MachineBasicBlock &B, BuildMI(B, It, DL, HII.get(Hexagon::A2_tfrsi), TmpR0) .addImm(0x01010101); - MachineFunction &MF = *B.getParent(); - auto *HRI = MF.getSubtarget<HexagonSubtarget>().getRegisterInfo(); - HII.loadRegFromStackSlot(B, It, TmpR1, FI, RC, HRI, Register()); + HII.loadRegFromStackSlot(B, It, TmpR1, FI, RC, Register()); expandLoadVec(B, std::prev(It), MRI, HII, NewRegs); BuildMI(B, It, DL, HII.get(Hexagon::V6_vandvrt), DstR) @@ -2225,7 +2222,7 @@ void HexagonFrameLowering::optimizeSpillSlots(MachineFunction &MF, if (!Bad) { // If the addressing mode is ok, check the register class. unsigned OpNum = Load ? 0 : 2; - auto *RC = HII.getRegClass(In.getDesc(), OpNum, &HRI); + auto *RC = HII.getRegClass(In.getDesc(), OpNum); RC = getCommonRC(SI.RC, RC); if (RC == nullptr) Bad = true; @@ -2395,7 +2392,7 @@ void HexagonFrameLowering::optimizeSpillSlots(MachineFunction &MF, HexagonBlockRanges::RegisterRef SrcRR = { SrcOp.getReg(), SrcOp.getSubReg() }; - auto *RC = HII.getRegClass(SI.getDesc(), 2, &HRI); + auto *RC = HII.getRegClass(SI.getDesc(), 2); // The this-> is needed to unconfuse MSVC. Register FoundR = this->findPhysReg(MF, Range, IM, DM, RC); LLVM_DEBUG(dbgs() << "Replacement reg:" << printReg(FoundR, &HRI) diff --git a/llvm/lib/Target/Hexagon/HexagonInstrInfo.cpp b/llvm/lib/Target/Hexagon/HexagonInstrInfo.cpp index 55bafde..7682af4 100644 --- a/llvm/lib/Target/Hexagon/HexagonInstrInfo.cpp +++ b/llvm/lib/Target/Hexagon/HexagonInstrInfo.cpp @@ -118,9 +118,9 @@ const int Hexagon_ADDI_OFFSET_MIN = -32768; void HexagonInstrInfo::anchor() {} HexagonInstrInfo::HexagonInstrInfo(const HexagonSubtarget &ST) - : HexagonGenInstrInfo(ST, Hexagon::ADJCALLSTACKDOWN, + : HexagonGenInstrInfo(ST, RegInfo, Hexagon::ADJCALLSTACKDOWN, Hexagon::ADJCALLSTACKUP), - Subtarget(ST) {} + RegInfo(ST.getHwMode()), Subtarget(ST) {} namespace llvm { namespace HexagonFUnits { @@ -964,7 +964,6 @@ void HexagonInstrInfo::storeRegToStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator I, Register SrcReg, bool isKill, int FI, const TargetRegisterClass *RC, - const TargetRegisterInfo *TRI, Register VReg, MachineInstr::MIFlag Flags) const { DebugLoc DL = MBB.findDebugLoc(I); @@ -1009,10 +1008,12 @@ void HexagonInstrInfo::storeRegToStackSlot(MachineBasicBlock &MBB, } } -void HexagonInstrInfo::loadRegFromStackSlot( - MachineBasicBlock &MBB, MachineBasicBlock::iterator I, Register DestReg, - int FI, const TargetRegisterClass *RC, const TargetRegisterInfo *TRI, - Register VReg, MachineInstr::MIFlag Flags) const { +void HexagonInstrInfo::loadRegFromStackSlot(MachineBasicBlock &MBB, + MachineBasicBlock::iterator I, + Register DestReg, int FI, + const TargetRegisterClass *RC, + Register VReg, + MachineInstr::MIFlag Flags) const { DebugLoc DL = MBB.findDebugLoc(I); MachineFunction &MF = *MBB.getParent(); MachineFrameInfo &MFI = MF.getFrameInfo(); diff --git a/llvm/lib/Target/Hexagon/HexagonInstrInfo.h b/llvm/lib/Target/Hexagon/HexagonInstrInfo.h index 48adf82..796b978 100644 --- a/llvm/lib/Target/Hexagon/HexagonInstrInfo.h +++ b/llvm/lib/Target/Hexagon/HexagonInstrInfo.h @@ -23,6 +23,8 @@ #include <cstdint> #include <vector> +#include "HexagonRegisterInfo.h" + #define GET_INSTRINFO_HEADER #include "HexagonGenInstrInfo.inc" @@ -36,6 +38,7 @@ class MachineOperand; class TargetRegisterInfo; class HexagonInstrInfo : public HexagonGenInstrInfo { + const HexagonRegisterInfo RegInfo; const HexagonSubtarget &Subtarget; enum BundleAttribute { @@ -47,6 +50,8 @@ class HexagonInstrInfo : public HexagonGenInstrInfo { public: explicit HexagonInstrInfo(const HexagonSubtarget &ST); + const HexagonRegisterInfo &getRegisterInfo() const { return RegInfo; } + /// TargetInstrInfo overrides. /// If the specified machine instruction is a direct @@ -183,8 +188,7 @@ public: /// is true, the register operand is the last use and must be marked kill. void storeRegToStackSlot( MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI, Register SrcReg, - bool isKill, int FrameIndex, const TargetRegisterClass *RC, - const TargetRegisterInfo *TRI, Register VReg, + bool isKill, int FrameIndex, const TargetRegisterClass *RC, Register VReg, MachineInstr::MIFlag Flags = MachineInstr::NoFlags) const override; /// Load the specified register of the given register class from the specified @@ -193,7 +197,7 @@ public: void loadRegFromStackSlot( MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI, Register DestReg, int FrameIndex, const TargetRegisterClass *RC, - const TargetRegisterInfo *TRI, Register VReg, + Register VReg, MachineInstr::MIFlag Flags = MachineInstr::NoFlags) const override; /// This function is called for all pseudo instructions diff --git a/llvm/lib/Target/Hexagon/HexagonLoadStoreWidening.cpp b/llvm/lib/Target/Hexagon/HexagonLoadStoreWidening.cpp index 7cbd81f..54969b2 100644 --- a/llvm/lib/Target/Hexagon/HexagonLoadStoreWidening.cpp +++ b/llvm/lib/Target/Hexagon/HexagonLoadStoreWidening.cpp @@ -646,7 +646,7 @@ bool HexagonLoadStoreWidening::createWideStores(InstrGroup &OG, InstrGroup &NG, MachineInstr *CombI; if (Acc != 0) { const MCInstrDesc &TfrD = TII->get(Hexagon::A2_tfrsi); - const TargetRegisterClass *RC = TII->getRegClass(TfrD, 0, TRI); + const TargetRegisterClass *RC = TII->getRegClass(TfrD, 0); Register VReg = MF->getRegInfo().createVirtualRegister(RC); MachineInstr *TfrI = BuildMI(*MF, DL, TfrD, VReg).addImm(LowerAcc); NG.push_back(TfrI); @@ -677,7 +677,7 @@ bool HexagonLoadStoreWidening::createWideStores(InstrGroup &OG, InstrGroup &NG, } else { // Create vreg = A2_tfrsi #Acc; mem[hw] = vreg const MCInstrDesc &TfrD = TII->get(Hexagon::A2_tfrsi); - const TargetRegisterClass *RC = TII->getRegClass(TfrD, 0, TRI); + const TargetRegisterClass *RC = TII->getRegClass(TfrD, 0); Register VReg = MF->getRegInfo().createVirtualRegister(RC); MachineInstr *TfrI = BuildMI(*MF, DL, TfrD, VReg).addImm(int(Acc)); NG.push_back(TfrI); diff --git a/llvm/lib/Target/Hexagon/HexagonSubtarget.cpp b/llvm/lib/Target/Hexagon/HexagonSubtarget.cpp index a3c8a88..66c8b0a 100644 --- a/llvm/lib/Target/Hexagon/HexagonSubtarget.cpp +++ b/llvm/lib/Target/Hexagon/HexagonSubtarget.cpp @@ -76,8 +76,7 @@ HexagonSubtarget::HexagonSubtarget(const Triple &TT, StringRef CPU, OptLevel(TM.getOptLevel()), CPUString(std::string(Hexagon_MC::selectHexagonCPU(CPU))), TargetTriple(TT), InstrInfo(initializeSubtargetDependencies(CPU, FS)), - RegInfo(getHwMode()), TLInfo(TM, *this), - InstrItins(getInstrItineraryForCPU(CPUString)) { + TLInfo(TM, *this), InstrItins(getInstrItineraryForCPU(CPUString)) { Hexagon_MC::addArchSubtarget(this, FS); // Beware of the default constructor of InstrItineraryData: it will // reset all members to 0. diff --git a/llvm/lib/Target/Hexagon/HexagonSubtarget.h b/llvm/lib/Target/Hexagon/HexagonSubtarget.h index 995f66d..7dfede2 100644 --- a/llvm/lib/Target/Hexagon/HexagonSubtarget.h +++ b/llvm/lib/Target/Hexagon/HexagonSubtarget.h @@ -100,7 +100,6 @@ private: // The following objects can use the TargetTriple, so they must be // declared after it. HexagonInstrInfo InstrInfo; - HexagonRegisterInfo RegInfo; HexagonTargetLowering TLInfo; HexagonSelectionDAGInfo TSInfo; HexagonFrameLowering FrameLowering; @@ -122,7 +121,7 @@ public: } const HexagonInstrInfo *getInstrInfo() const override { return &InstrInfo; } const HexagonRegisterInfo *getRegisterInfo() const override { - return &RegInfo; + return &InstrInfo.getRegisterInfo(); } const HexagonTargetLowering *getTargetLowering() const override { return &TLInfo; @@ -295,6 +294,8 @@ public: bool useBSBScheduling() const { return UseBSBScheduling; } bool enableMachineScheduler() const override; + bool enableTerminalRule() const override { return true; } + // Always use the TargetLowering default scheduler. // FIXME: This will use the vliw scheduler which is probably just hurting // compiler time and will be removed eventually anyway. diff --git a/llvm/lib/Target/Hexagon/HexagonVLIWPacketizer.cpp b/llvm/lib/Target/Hexagon/HexagonVLIWPacketizer.cpp index cb88d1a..d39b79a 100644 --- a/llvm/lib/Target/Hexagon/HexagonVLIWPacketizer.cpp +++ b/llvm/lib/Target/Hexagon/HexagonVLIWPacketizer.cpp @@ -653,7 +653,7 @@ bool HexagonPacketizerList::canPromoteToNewValueStore(const MachineInstr &MI, const MCInstrDesc& MCID = PacketMI.getDesc(); // First operand is always the result. - const TargetRegisterClass *PacketRC = HII->getRegClass(MCID, 0, HRI); + const TargetRegisterClass *PacketRC = HII->getRegClass(MCID, 0); // Double regs can not feed into new value store: PRM section: 5.4.2.2. if (PacketRC == &Hexagon::DoubleRegsRegClass) return false; @@ -866,7 +866,7 @@ bool HexagonPacketizerList::canPromoteToDotNew(const MachineInstr &MI, return false; const MCInstrDesc& MCID = PI.getDesc(); - const TargetRegisterClass *VecRC = HII->getRegClass(MCID, 0, HRI); + const TargetRegisterClass *VecRC = HII->getRegClass(MCID, 0); if (DisableVecDblNVStores && VecRC == &Hexagon::HvxWRRegClass) return false; diff --git a/llvm/lib/Target/Lanai/LanaiInstrInfo.cpp b/llvm/lib/Target/Lanai/LanaiInstrInfo.cpp index 02ed1001..14b7557 100644 --- a/llvm/lib/Target/Lanai/LanaiInstrInfo.cpp +++ b/llvm/lib/Target/Lanai/LanaiInstrInfo.cpp @@ -27,7 +27,8 @@ using namespace llvm; #include "LanaiGenInstrInfo.inc" LanaiInstrInfo::LanaiInstrInfo(const LanaiSubtarget &STI) - : LanaiGenInstrInfo(STI, Lanai::ADJCALLSTACKDOWN, Lanai::ADJCALLSTACKUP), + : LanaiGenInstrInfo(STI, RegisterInfo, Lanai::ADJCALLSTACKDOWN, + Lanai::ADJCALLSTACKUP), RegisterInfo() {} void LanaiInstrInfo::copyPhysReg(MachineBasicBlock &MBB, @@ -48,8 +49,7 @@ void LanaiInstrInfo::copyPhysReg(MachineBasicBlock &MBB, void LanaiInstrInfo::storeRegToStackSlot( MachineBasicBlock &MBB, MachineBasicBlock::iterator Position, Register SourceRegister, bool IsKill, int FrameIndex, - const TargetRegisterClass *RegisterClass, - const TargetRegisterInfo * /*RegisterInfo*/, Register /*VReg*/, + const TargetRegisterClass *RegisterClass, Register /*VReg*/, MachineInstr::MIFlag /*Flags*/) const { DebugLoc DL; if (Position != MBB.end()) { @@ -69,8 +69,7 @@ void LanaiInstrInfo::storeRegToStackSlot( void LanaiInstrInfo::loadRegFromStackSlot( MachineBasicBlock &MBB, MachineBasicBlock::iterator Position, Register DestinationRegister, int FrameIndex, - const TargetRegisterClass *RegisterClass, - const TargetRegisterInfo * /*RegisterInfo*/, Register /*VReg*/, + const TargetRegisterClass *RegisterClass, Register /*VReg*/, MachineInstr::MIFlag /*Flags*/) const { DebugLoc DL; if (Position != MBB.end()) { diff --git a/llvm/lib/Target/Lanai/LanaiInstrInfo.h b/llvm/lib/Target/Lanai/LanaiInstrInfo.h index d9827624..155e2f0 100644 --- a/llvm/lib/Target/Lanai/LanaiInstrInfo.h +++ b/llvm/lib/Target/Lanai/LanaiInstrInfo.h @@ -58,15 +58,13 @@ public: void storeRegToStackSlot( MachineBasicBlock &MBB, MachineBasicBlock::iterator Position, Register SourceRegister, bool IsKill, int FrameIndex, - const TargetRegisterClass *RegisterClass, - const TargetRegisterInfo *RegisterInfo, Register VReg, + const TargetRegisterClass *RegisterClass, Register VReg, MachineInstr::MIFlag Flags = MachineInstr::NoFlags) const override; void loadRegFromStackSlot( MachineBasicBlock &MBB, MachineBasicBlock::iterator Position, Register DestinationRegister, int FrameIndex, - const TargetRegisterClass *RegisterClass, - const TargetRegisterInfo *RegisterInfo, Register VReg, + const TargetRegisterClass *RegisterClass, Register VReg, MachineInstr::MIFlag Flags = MachineInstr::NoFlags) const override; bool expandPostRAPseudo(MachineInstr &MI) const override; diff --git a/llvm/lib/Target/LoongArch/LoongArchDeadRegisterDefinitions.cpp b/llvm/lib/Target/LoongArch/LoongArchDeadRegisterDefinitions.cpp index 0ccebeb3..6358e348 100644 --- a/llvm/lib/Target/LoongArch/LoongArchDeadRegisterDefinitions.cpp +++ b/llvm/lib/Target/LoongArch/LoongArchDeadRegisterDefinitions.cpp @@ -60,7 +60,6 @@ bool LoongArchDeadRegisterDefinitions::runOnMachineFunction( return false; const TargetInstrInfo *TII = MF.getSubtarget().getInstrInfo(); - const TargetRegisterInfo *TRI = MF.getSubtarget().getRegisterInfo(); LiveIntervals &LIS = getAnalysis<LiveIntervalsWrapperPass>().getLIS(); LLVM_DEBUG(dbgs() << "***** LoongArchDeadRegisterDefinitions *****\n"); @@ -86,7 +85,7 @@ bool LoongArchDeadRegisterDefinitions::runOnMachineFunction( continue; LLVM_DEBUG(dbgs() << " Dead def operand #" << I << " in:\n "; MI.print(dbgs())); - const TargetRegisterClass *RC = TII->getRegClass(Desc, I, TRI); + const TargetRegisterClass *RC = TII->getRegClass(Desc, I); if (!(RC && RC->contains(LoongArch::R0))) { LLVM_DEBUG(dbgs() << " Ignoring, register is not a GPR.\n"); continue; diff --git a/llvm/lib/Target/LoongArch/LoongArchFrameLowering.cpp b/llvm/lib/Target/LoongArch/LoongArchFrameLowering.cpp index 1493bf4..690b063 100644 --- a/llvm/lib/Target/LoongArch/LoongArchFrameLowering.cpp +++ b/llvm/lib/Target/LoongArch/LoongArchFrameLowering.cpp @@ -449,7 +449,7 @@ bool LoongArchFrameLowering::spillCalleeSavedRegisters( bool IsKill = !(Reg == LoongArch::R1 && MF->getFrameInfo().isReturnAddressTaken()); const TargetRegisterClass *RC = TRI->getMinimalPhysRegClass(Reg); - TII.storeRegToStackSlot(MBB, MI, Reg, IsKill, CS.getFrameIdx(), RC, TRI, + TII.storeRegToStackSlot(MBB, MI, Reg, IsKill, CS.getFrameIdx(), RC, Register()); } diff --git a/llvm/lib/Target/LoongArch/LoongArchInstrInfo.cpp b/llvm/lib/Target/LoongArch/LoongArchInstrInfo.cpp index 9a35df2..9fc862a 100644 --- a/llvm/lib/Target/LoongArch/LoongArchInstrInfo.cpp +++ b/llvm/lib/Target/LoongArch/LoongArchInstrInfo.cpp @@ -26,9 +26,9 @@ using namespace llvm; #include "LoongArchGenInstrInfo.inc" LoongArchInstrInfo::LoongArchInstrInfo(const LoongArchSubtarget &STI) - : LoongArchGenInstrInfo(STI, LoongArch::ADJCALLSTACKDOWN, + : LoongArchGenInstrInfo(STI, RegInfo, LoongArch::ADJCALLSTACKDOWN, LoongArch::ADJCALLSTACKUP), - STI(STI) {} + RegInfo(STI.getHwMode()), STI(STI) {} MCInst LoongArchInstrInfo::getNop() const { return MCInstBuilder(LoongArch::ANDI) @@ -113,14 +113,14 @@ void LoongArchInstrInfo::copyPhysReg(MachineBasicBlock &MBB, void LoongArchInstrInfo::storeRegToStackSlot( MachineBasicBlock &MBB, MachineBasicBlock::iterator I, Register SrcReg, bool IsKill, int FI, const TargetRegisterClass *RC, - const TargetRegisterInfo *TRI, Register VReg, - MachineInstr::MIFlag Flags) const { + + Register VReg, MachineInstr::MIFlag Flags) const { MachineFunction *MF = MBB.getParent(); MachineFrameInfo &MFI = MF->getFrameInfo(); unsigned Opcode; if (LoongArch::GPRRegClass.hasSubClassEq(RC)) - Opcode = TRI->getRegSizeInBits(LoongArch::GPRRegClass) == 32 + Opcode = TRI.getRegSizeInBits(LoongArch::GPRRegClass) == 32 ? LoongArch::ST_W : LoongArch::ST_D; else if (LoongArch::FPR32RegClass.hasSubClassEq(RC)) @@ -149,8 +149,8 @@ void LoongArchInstrInfo::storeRegToStackSlot( void LoongArchInstrInfo::loadRegFromStackSlot( MachineBasicBlock &MBB, MachineBasicBlock::iterator I, Register DstReg, - int FI, const TargetRegisterClass *RC, const TargetRegisterInfo *TRI, - Register VReg, MachineInstr::MIFlag Flags) const { + int FI, const TargetRegisterClass *RC, Register VReg, + MachineInstr::MIFlag Flags) const { MachineFunction *MF = MBB.getParent(); MachineFrameInfo &MFI = MF->getFrameInfo(); DebugLoc DL; @@ -159,7 +159,7 @@ void LoongArchInstrInfo::loadRegFromStackSlot( unsigned Opcode; if (LoongArch::GPRRegClass.hasSubClassEq(RC)) - Opcode = TRI->getRegSizeInBits(LoongArch::GPRRegClass) == 32 + Opcode = RegInfo.getRegSizeInBits(LoongArch::GPRRegClass) == 32 ? LoongArch::LD_W : LoongArch::LD_D; else if (LoongArch::FPR32RegClass.hasSubClassEq(RC)) @@ -665,13 +665,13 @@ void LoongArchInstrInfo::insertIndirectBranch(MachineBasicBlock &MBB, if (FrameIndex == -1) report_fatal_error("The function size is incorrectly estimated."); storeRegToStackSlot(MBB, PCALAU12I, Scav, /*IsKill=*/true, FrameIndex, - &LoongArch::GPRRegClass, TRI, Register()); + &LoongArch::GPRRegClass, Register()); TRI->eliminateFrameIndex(std::prev(PCALAU12I.getIterator()), /*SpAdj=*/0, /*FIOperandNum=*/1); PCALAU12I.getOperand(1).setMBB(&RestoreBB); ADDI.getOperand(2).setMBB(&RestoreBB); loadRegFromStackSlot(RestoreBB, RestoreBB.end(), Scav, FrameIndex, - &LoongArch::GPRRegClass, TRI, Register()); + &LoongArch::GPRRegClass, Register()); TRI->eliminateFrameIndex(RestoreBB.back(), /*SpAdj=*/0, /*FIOperandNum=*/1); } diff --git a/llvm/lib/Target/LoongArch/LoongArchInstrInfo.h b/llvm/lib/Target/LoongArch/LoongArchInstrInfo.h index e61314c..9f7a0a2 100644 --- a/llvm/lib/Target/LoongArch/LoongArchInstrInfo.h +++ b/llvm/lib/Target/LoongArch/LoongArchInstrInfo.h @@ -24,9 +24,13 @@ namespace llvm { class LoongArchSubtarget; class LoongArchInstrInfo : public LoongArchGenInstrInfo { + const LoongArchRegisterInfo RegInfo; + public: explicit LoongArchInstrInfo(const LoongArchSubtarget &STI); + const LoongArchRegisterInfo &getRegisterInfo() const { return RegInfo; } + MCInst getNop() const override; void copyPhysReg(MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI, @@ -36,13 +40,11 @@ public: void storeRegToStackSlot( MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI, Register SrcReg, - bool IsKill, int FrameIndex, const TargetRegisterClass *RC, - const TargetRegisterInfo *TRI, Register VReg, + bool IsKill, int FrameIndex, const TargetRegisterClass *RC, Register VReg, MachineInstr::MIFlag Flags = MachineInstr::NoFlags) const override; void loadRegFromStackSlot( MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI, Register DstReg, - int FrameIndex, const TargetRegisterClass *RC, - const TargetRegisterInfo *TRI, Register VReg, + int FrameIndex, const TargetRegisterClass *RC, Register VReg, MachineInstr::MIFlag Flags = MachineInstr::NoFlags) const override; // Materializes the given integer Val into DstReg. diff --git a/llvm/lib/Target/LoongArch/LoongArchSubtarget.cpp b/llvm/lib/Target/LoongArch/LoongArchSubtarget.cpp index 3acbe49..76a8ba1 100644 --- a/llvm/lib/Target/LoongArch/LoongArchSubtarget.cpp +++ b/llvm/lib/Target/LoongArch/LoongArchSubtarget.cpp @@ -95,4 +95,4 @@ LoongArchSubtarget::LoongArchSubtarget(const Triple &TT, StringRef CPU, : LoongArchGenSubtargetInfo(TT, CPU, TuneCPU, FS), FrameLowering( initializeSubtargetDependencies(TT, CPU, TuneCPU, FS, ABIName)), - InstrInfo(*this), RegInfo(getHwMode()), TLInfo(TM, *this) {} + InstrInfo(*this), TLInfo(TM, *this) {} diff --git a/llvm/lib/Target/LoongArch/LoongArchSubtarget.h b/llvm/lib/Target/LoongArch/LoongArchSubtarget.h index 5e12baf..2beff07 100644 --- a/llvm/lib/Target/LoongArch/LoongArchSubtarget.h +++ b/llvm/lib/Target/LoongArch/LoongArchSubtarget.h @@ -45,7 +45,6 @@ class LoongArchSubtarget : public LoongArchGenSubtargetInfo { LoongArchABI::ABI TargetABI = LoongArchABI::ABI_Unknown; LoongArchFrameLowering FrameLowering; LoongArchInstrInfo InstrInfo; - LoongArchRegisterInfo RegInfo; LoongArchTargetLowering TLInfo; SelectionDAGTargetInfo TSInfo; @@ -78,7 +77,7 @@ public: } const LoongArchInstrInfo *getInstrInfo() const override { return &InstrInfo; } const LoongArchRegisterInfo *getRegisterInfo() const override { - return &RegInfo; + return &InstrInfo.getRegisterInfo(); } const LoongArchTargetLowering *getTargetLowering() const override { return &TLInfo; diff --git a/llvm/lib/Target/M68k/M68kInstrInfo.cpp b/llvm/lib/Target/M68k/M68kInstrInfo.cpp index c6be190b..91077ff 100644 --- a/llvm/lib/Target/M68k/M68kInstrInfo.cpp +++ b/llvm/lib/Target/M68k/M68kInstrInfo.cpp @@ -43,7 +43,7 @@ using namespace llvm; void M68kInstrInfo::anchor() {} M68kInstrInfo::M68kInstrInfo(const M68kSubtarget &STI) - : M68kGenInstrInfo(STI, M68k::ADJCALLSTACKDOWN, M68k::ADJCALLSTACKUP, 0, + : M68kGenInstrInfo(STI, RI, M68k::ADJCALLSTACKDOWN, M68k::ADJCALLSTACKUP, 0, M68k::RET), Subtarget(STI), RI(STI) {} @@ -838,15 +838,14 @@ bool M68kInstrInfo::getStackSlotRange(const TargetRegisterClass *RC, void M68kInstrInfo::storeRegToStackSlot( MachineBasicBlock &MBB, MachineBasicBlock::iterator MI, Register SrcReg, - bool IsKill, int FrameIndex, const TargetRegisterClass *RC, - const TargetRegisterInfo *TRI, Register VReg, + bool IsKill, int FrameIndex, const TargetRegisterClass *RC, Register VReg, MachineInstr::MIFlag Flags) const { const MachineFrameInfo &MFI = MBB.getParent()->getFrameInfo(); - assert(MFI.getObjectSize(FrameIndex) >= TRI->getSpillSize(*RC) && + assert(MFI.getObjectSize(FrameIndex) >= TRI.getSpillSize(*RC) && "Stack slot is too small to store"); (void)MFI; - unsigned Opc = getStoreRegOpcode(SrcReg, RC, TRI, Subtarget); + unsigned Opc = getStoreRegOpcode(SrcReg, RC, &TRI, Subtarget); DebugLoc DL = MBB.findDebugLoc(MI); // (0,FrameIndex) <- $reg M68k::addFrameReference(BuildMI(MBB, MI, DL, get(Opc)), FrameIndex) @@ -857,15 +856,14 @@ void M68kInstrInfo::loadRegFromStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator MI, Register DstReg, int FrameIndex, const TargetRegisterClass *RC, - const TargetRegisterInfo *TRI, Register VReg, MachineInstr::MIFlag Flags) const { const MachineFrameInfo &MFI = MBB.getParent()->getFrameInfo(); - assert(MFI.getObjectSize(FrameIndex) >= TRI->getSpillSize(*RC) && + assert(MFI.getObjectSize(FrameIndex) >= TRI.getSpillSize(*RC) && "Stack slot is too small to load"); (void)MFI; - unsigned Opc = getLoadRegOpcode(DstReg, RC, TRI, Subtarget); + unsigned Opc = getLoadRegOpcode(DstReg, RC, &TRI, Subtarget); DebugLoc DL = MBB.findDebugLoc(MI); M68k::addFrameReference(BuildMI(MBB, MI, DL, get(Opc), DstReg), FrameIndex); } diff --git a/llvm/lib/Target/M68k/M68kInstrInfo.h b/llvm/lib/Target/M68k/M68kInstrInfo.h index 97615d6..2b3789d 100644 --- a/llvm/lib/Target/M68k/M68kInstrInfo.h +++ b/llvm/lib/Target/M68k/M68kInstrInfo.h @@ -280,14 +280,12 @@ public: void storeRegToStackSlot( MachineBasicBlock &MBB, MachineBasicBlock::iterator MI, Register SrcReg, - bool IsKill, int FrameIndex, const TargetRegisterClass *RC, - const TargetRegisterInfo *TRI, Register VReg, + bool IsKill, int FrameIndex, const TargetRegisterClass *RC, Register VReg, MachineInstr::MIFlag Flags = MachineInstr::NoFlags) const override; void loadRegFromStackSlot( MachineBasicBlock &MBB, MachineBasicBlock::iterator MI, Register DestReg, - int FrameIndex, const TargetRegisterClass *RC, - const TargetRegisterInfo *TRI, Register VReg, + int FrameIndex, const TargetRegisterClass *RC, Register VReg, MachineInstr::MIFlag Flags = MachineInstr::NoFlags) const override; bool expandPostRAPseudo(MachineInstr &MI) const override; diff --git a/llvm/lib/Target/MSP430/MSP430InstrInfo.cpp b/llvm/lib/Target/MSP430/MSP430InstrInfo.cpp index 65b4820..0fb4e9d 100644 --- a/llvm/lib/Target/MSP430/MSP430InstrInfo.cpp +++ b/llvm/lib/Target/MSP430/MSP430InstrInfo.cpp @@ -26,13 +26,13 @@ using namespace llvm; void MSP430InstrInfo::anchor() {} MSP430InstrInfo::MSP430InstrInfo(const MSP430Subtarget &STI) - : MSP430GenInstrInfo(STI, MSP430::ADJCALLSTACKDOWN, MSP430::ADJCALLSTACKUP), + : MSP430GenInstrInfo(STI, RI, MSP430::ADJCALLSTACKDOWN, + MSP430::ADJCALLSTACKUP), RI() {} void MSP430InstrInfo::storeRegToStackSlot( MachineBasicBlock &MBB, MachineBasicBlock::iterator MI, Register SrcReg, - bool isKill, int FrameIdx, const TargetRegisterClass *RC, - const TargetRegisterInfo *TRI, Register VReg, + bool isKill, int FrameIdx, const TargetRegisterClass *RC, Register VReg, MachineInstr::MIFlag Flags) const { DebugLoc DL; if (MI != MBB.end()) DL = MI->getDebugLoc(); @@ -56,10 +56,12 @@ void MSP430InstrInfo::storeRegToStackSlot( llvm_unreachable("Cannot store this register to stack slot!"); } -void MSP430InstrInfo::loadRegFromStackSlot( - MachineBasicBlock &MBB, MachineBasicBlock::iterator MI, Register DestReg, - int FrameIdx, const TargetRegisterClass *RC, const TargetRegisterInfo *TRI, - Register VReg, MachineInstr::MIFlag Flags) const { +void MSP430InstrInfo::loadRegFromStackSlot(MachineBasicBlock &MBB, + MachineBasicBlock::iterator MI, + Register DestReg, int FrameIdx, + const TargetRegisterClass *RC, + Register VReg, + MachineInstr::MIFlag Flags) const { DebugLoc DL; if (MI != MBB.end()) DL = MI->getDebugLoc(); MachineFunction &MF = *MBB.getParent(); diff --git a/llvm/lib/Target/MSP430/MSP430InstrInfo.h b/llvm/lib/Target/MSP430/MSP430InstrInfo.h index 316c136..c0a3984 100644 --- a/llvm/lib/Target/MSP430/MSP430InstrInfo.h +++ b/llvm/lib/Target/MSP430/MSP430InstrInfo.h @@ -42,13 +42,11 @@ public: void storeRegToStackSlot( MachineBasicBlock &MBB, MachineBasicBlock::iterator MI, Register SrcReg, - bool isKill, int FrameIndex, const TargetRegisterClass *RC, - const TargetRegisterInfo *TRI, Register VReg, + bool isKill, int FrameIndex, const TargetRegisterClass *RC, Register VReg, MachineInstr::MIFlag Flags = MachineInstr::NoFlags) const override; void loadRegFromStackSlot( MachineBasicBlock &MBB, MachineBasicBlock::iterator MI, Register DestReg, - int FrameIdx, const TargetRegisterClass *RC, - const TargetRegisterInfo *TRI, Register VReg, + int FrameIdx, const TargetRegisterClass *RC, Register VReg, MachineInstr::MIFlag Flags = MachineInstr::NoFlags) const override; unsigned getInstSizeInBytes(const MachineInstr &MI) const override; diff --git a/llvm/lib/Target/Mips/Mips16InstrInfo.cpp b/llvm/lib/Target/Mips/Mips16InstrInfo.cpp index aa94f54..d23ec57 100644 --- a/llvm/lib/Target/Mips/Mips16InstrInfo.cpp +++ b/llvm/lib/Target/Mips/Mips16InstrInfo.cpp @@ -37,11 +37,7 @@ using namespace llvm; #define DEBUG_TYPE "mips16-instrinfo" Mips16InstrInfo::Mips16InstrInfo(const MipsSubtarget &STI) - : MipsInstrInfo(STI, Mips::Bimm16), RI(STI) {} - -const MipsRegisterInfo &Mips16InstrInfo::getRegisterInfo() const { - return RI; -} + : MipsInstrInfo(STI, RI, Mips::Bimm16), RI(STI) {} /// isLoadFromStackSlot - If the specified machine instruction is a direct /// load from a stack slot, return the virtual or physical register number of @@ -105,7 +101,6 @@ void Mips16InstrInfo::storeRegToStack(MachineBasicBlock &MBB, MachineBasicBlock::iterator I, Register SrcReg, bool isKill, int FI, const TargetRegisterClass *RC, - const TargetRegisterInfo *TRI, int64_t Offset, MachineInstr::MIFlag Flags) const { DebugLoc DL; @@ -120,10 +115,12 @@ void Mips16InstrInfo::storeRegToStack(MachineBasicBlock &MBB, .addMemOperand(MMO); } -void Mips16InstrInfo::loadRegFromStack( - MachineBasicBlock &MBB, MachineBasicBlock::iterator I, Register DestReg, - int FI, const TargetRegisterClass *RC, const TargetRegisterInfo *TRI, - int64_t Offset, MachineInstr::MIFlag Flags) const { +void Mips16InstrInfo::loadRegFromStack(MachineBasicBlock &MBB, + MachineBasicBlock::iterator I, + Register DestReg, int FI, + const TargetRegisterClass *RC, + int64_t Offset, + MachineInstr::MIFlag Flags) const { DebugLoc DL; if (I != MBB.end()) DL = I->getDebugLoc(); MachineMemOperand *MMO = GetMemOperand(MBB, FI, MachineMemOperand::MOLoad); diff --git a/llvm/lib/Target/Mips/Mips16InstrInfo.h b/llvm/lib/Target/Mips/Mips16InstrInfo.h index 1058e8c..4300d08 100644 --- a/llvm/lib/Target/Mips/Mips16InstrInfo.h +++ b/llvm/lib/Target/Mips/Mips16InstrInfo.h @@ -30,7 +30,7 @@ class Mips16InstrInfo : public MipsInstrInfo { public: explicit Mips16InstrInfo(const MipsSubtarget &STI); - const MipsRegisterInfo &getRegisterInfo() const override; + const Mips16RegisterInfo &getRegisterInfo() const { return RI; } /// isLoadFromStackSlot - If the specified machine instruction is a direct /// load from a stack slot, return the virtual or physical register number of @@ -56,13 +56,14 @@ public: void storeRegToStack( MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI, Register SrcReg, bool isKill, int FrameIndex, const TargetRegisterClass *RC, - const TargetRegisterInfo *TRI, int64_t Offset, + int64_t Offset, MachineInstr::MIFlag Flags = MachineInstr::NoFlags) const override; void loadRegFromStack( MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI, Register DestReg, int FrameIndex, const TargetRegisterClass *RC, - const TargetRegisterInfo *TRI, int64_t Offset, + + int64_t Offset, MachineInstr::MIFlag Flags = MachineInstr::NoFlags) const override; bool expandPostRAPseudo(MachineInstr &MI) const override; diff --git a/llvm/lib/Target/Mips/MipsInstrInfo.cpp b/llvm/lib/Target/Mips/MipsInstrInfo.cpp index bffdffa..c879c46 100644 --- a/llvm/lib/Target/Mips/MipsInstrInfo.cpp +++ b/llvm/lib/Target/Mips/MipsInstrInfo.cpp @@ -39,8 +39,9 @@ using namespace llvm; // Pin the vtable to this file. void MipsInstrInfo::anchor() {} -MipsInstrInfo::MipsInstrInfo(const MipsSubtarget &STI, unsigned UncondBr) - : MipsGenInstrInfo(STI, Mips::ADJCALLSTACKDOWN, Mips::ADJCALLSTACKUP), +MipsInstrInfo::MipsInstrInfo(const MipsSubtarget &STI, + const MipsRegisterInfo &RI, unsigned UncondBr) + : MipsGenInstrInfo(STI, RI, Mips::ADJCALLSTACKDOWN, Mips::ADJCALLSTACKUP), Subtarget(STI), UncondBrOpc(UncondBr) {} const MipsInstrInfo *MipsInstrInfo::create(MipsSubtarget &STI) { diff --git a/llvm/lib/Target/Mips/MipsInstrInfo.h b/llvm/lib/Target/Mips/MipsInstrInfo.h index 2337ae7..0b90972 100644 --- a/llvm/lib/Target/Mips/MipsInstrInfo.h +++ b/llvm/lib/Target/Mips/MipsInstrInfo.h @@ -55,7 +55,8 @@ public: BT_Indirect // One indirct branch. }; - explicit MipsInstrInfo(const MipsSubtarget &STI, unsigned UncondBrOpc); + explicit MipsInstrInfo(const MipsSubtarget &STI, const MipsRegisterInfo &RI, + unsigned UncondBrOpc); MCInst getNop() const override; @@ -130,7 +131,10 @@ public: /// getRegisterInfo - TargetInstrInfo is a superset of MRegister info. As /// such, whenever a client has an instance of instruction info, it should /// always be able to get register info as well (through this method). - virtual const MipsRegisterInfo &getRegisterInfo() const = 0; + const MipsRegisterInfo &getRegisterInfo() const { + return static_cast<const MipsRegisterInfo &>( + TargetInstrInfo::getRegisterInfo()); + } virtual unsigned getOppositeBranchOpc(unsigned Opc) const = 0; @@ -143,31 +147,28 @@ public: void storeRegToStackSlot( MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI, Register SrcReg, - bool isKill, int FrameIndex, const TargetRegisterClass *RC, - const TargetRegisterInfo *TRI, Register VReg, + bool isKill, int FrameIndex, const TargetRegisterClass *RC, Register VReg, MachineInstr::MIFlag Flags = MachineInstr::NoFlags) const override { - storeRegToStack(MBB, MBBI, SrcReg, isKill, FrameIndex, RC, TRI, 0, Flags); + storeRegToStack(MBB, MBBI, SrcReg, isKill, FrameIndex, RC, 0, Flags); } void loadRegFromStackSlot( MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI, Register DestReg, int FrameIndex, const TargetRegisterClass *RC, - const TargetRegisterInfo *TRI, Register VReg, + Register VReg, MachineInstr::MIFlag Flags = MachineInstr::NoFlags) const override { - loadRegFromStack(MBB, MBBI, DestReg, FrameIndex, RC, TRI, 0, Flags); + loadRegFromStack(MBB, MBBI, DestReg, FrameIndex, RC, 0, Flags); } virtual void storeRegToStack(MachineBasicBlock &MBB, MachineBasicBlock::iterator MI, Register SrcReg, bool isKill, int FrameIndex, - const TargetRegisterClass *RC, const TargetRegisterInfo *TRI, - int64_t Offset, + const TargetRegisterClass *RC, int64_t Offset, MachineInstr::MIFlag Flags = MachineInstr::NoFlags) const = 0; virtual void loadRegFromStack( MachineBasicBlock &MBB, MachineBasicBlock::iterator MI, Register DestReg, - int FrameIndex, const TargetRegisterClass *RC, - const TargetRegisterInfo *TRI, int64_t Offset, + int FrameIndex, const TargetRegisterClass *RC, int64_t Offset, MachineInstr::MIFlag Flags = MachineInstr::NoFlags) const = 0; virtual void adjustStackPtr(unsigned SP, int64_t Amount, diff --git a/llvm/lib/Target/Mips/MipsSEFrameLowering.cpp b/llvm/lib/Target/Mips/MipsSEFrameLowering.cpp index f08704a..942194c 100644 --- a/llvm/lib/Target/Mips/MipsSEFrameLowering.cpp +++ b/llvm/lib/Target/Mips/MipsSEFrameLowering.cpp @@ -172,7 +172,7 @@ void ExpandPseudo::expandLoadCCond(MachineBasicBlock &MBB, Iter I) { Register VR = MRI.createVirtualRegister(RC); Register Dst = I->getOperand(0).getReg(), FI = I->getOperand(1).getIndex(); - TII.loadRegFromStack(MBB, I, VR, FI, RC, &RegInfo, 0); + TII.loadRegFromStack(MBB, I, VR, FI, RC, 0); BuildMI(MBB, I, I->getDebugLoc(), TII.get(TargetOpcode::COPY), Dst) .addReg(VR, RegState::Kill); } @@ -189,7 +189,7 @@ void ExpandPseudo::expandStoreCCond(MachineBasicBlock &MBB, Iter I) { BuildMI(MBB, I, I->getDebugLoc(), TII.get(TargetOpcode::COPY), VR) .addReg(Src, getKillRegState(I->getOperand(0).isKill())); - TII.storeRegToStack(MBB, I, VR, true, FI, RC, &RegInfo, 0); + TII.storeRegToStack(MBB, I, VR, true, FI, RC, 0); } void ExpandPseudo::expandLoadACC(MachineBasicBlock &MBB, Iter I, @@ -210,9 +210,9 @@ void ExpandPseudo::expandLoadACC(MachineBasicBlock &MBB, Iter I, DebugLoc DL = I->getDebugLoc(); const MCInstrDesc &Desc = TII.get(TargetOpcode::COPY); - TII.loadRegFromStack(MBB, I, VR0, FI, RC, &RegInfo, 0); + TII.loadRegFromStack(MBB, I, VR0, FI, RC, 0); BuildMI(MBB, I, DL, Desc, Lo).addReg(VR0, RegState::Kill); - TII.loadRegFromStack(MBB, I, VR1, FI, RC, &RegInfo, RegSize); + TII.loadRegFromStack(MBB, I, VR1, FI, RC, RegSize); BuildMI(MBB, I, DL, Desc, Hi).addReg(VR1, RegState::Kill); } @@ -234,9 +234,9 @@ void ExpandPseudo::expandStoreACC(MachineBasicBlock &MBB, Iter I, DebugLoc DL = I->getDebugLoc(); BuildMI(MBB, I, DL, TII.get(MFLoOpc), VR0).addReg(Src); - TII.storeRegToStack(MBB, I, VR0, true, FI, RC, &RegInfo, 0); + TII.storeRegToStack(MBB, I, VR0, true, FI, RC, 0); BuildMI(MBB, I, DL, TII.get(MFHiOpc), VR1).addReg(Src, SrcKill); - TII.storeRegToStack(MBB, I, VR1, true, FI, RC, &RegInfo, RegSize); + TII.storeRegToStack(MBB, I, VR1, true, FI, RC, RegSize); } bool ExpandPseudo::expandCopy(MachineBasicBlock &MBB, Iter I) { @@ -321,11 +321,9 @@ bool ExpandPseudo::expandBuildPairF64(MachineBasicBlock &MBB, int FI = MF.getInfo<MipsFunctionInfo>()->getMoveF64ViaSpillFI(MF, RC2); if (!Subtarget.isLittle()) std::swap(LoReg, HiReg); - TII.storeRegToStack(MBB, I, LoReg, I->getOperand(1).isKill(), FI, RC, - &RegInfo, 0); - TII.storeRegToStack(MBB, I, HiReg, I->getOperand(2).isKill(), FI, RC, - &RegInfo, 4); - TII.loadRegFromStack(MBB, I, DstReg, FI, RC2, &RegInfo, 0); + TII.storeRegToStack(MBB, I, LoReg, I->getOperand(1).isKill(), FI, RC, 0); + TII.storeRegToStack(MBB, I, HiReg, I->getOperand(2).isKill(), FI, RC, 4); + TII.loadRegFromStack(MBB, I, DstReg, FI, RC2, 0); return true; } @@ -385,8 +383,8 @@ bool ExpandPseudo::expandExtractElementF64(MachineBasicBlock &MBB, // We re-use the same spill slot each time so that the stack frame doesn't // grow too much in functions with a large number of moves. int FI = MF.getInfo<MipsFunctionInfo>()->getMoveF64ViaSpillFI(MF, RC); - TII.storeRegToStack(MBB, I, SrcReg, Op1.isKill(), FI, RC, &RegInfo, 0); - TII.loadRegFromStack(MBB, I, DstReg, FI, RC2, &RegInfo, Offset); + TII.storeRegToStack(MBB, I, SrcReg, Op1.isKill(), FI, RC, 0); + TII.loadRegFromStack(MBB, I, DstReg, FI, RC2, Offset); return true; } @@ -480,8 +478,7 @@ void MipsSEFrameLowering::emitPrologue(MachineFunction &MF, if (!MBB.isLiveIn(ABI.GetEhDataReg(I))) MBB.addLiveIn(ABI.GetEhDataReg(I)); TII.storeRegToStackSlot(MBB, MBBI, ABI.GetEhDataReg(I), false, - MipsFI->getEhDataRegFI(I), RC, &RegInfo, - Register()); + MipsFI->getEhDataRegFI(I), RC, Register()); } // Emit .cfi_offset directives for eh data registers. @@ -579,8 +576,7 @@ void MipsSEFrameLowering::emitInterruptPrologueStub( .setMIFlag(MachineInstr::FrameSetup); STI.getInstrInfo()->storeRegToStack(MBB, MBBI, Mips::K1, false, - MipsFI->getISRRegFI(0), PtrRC, - STI.getRegisterInfo(), 0); + MipsFI->getISRRegFI(0), PtrRC, 0); // Fetch and Spill Status MBB.addLiveIn(Mips::COP012); @@ -590,8 +586,7 @@ void MipsSEFrameLowering::emitInterruptPrologueStub( .setMIFlag(MachineInstr::FrameSetup); STI.getInstrInfo()->storeRegToStack(MBB, MBBI, Mips::K1, false, - MipsFI->getISRRegFI(1), PtrRC, - STI.getRegisterInfo(), 0); + MipsFI->getISRRegFI(1), PtrRC, 0); // Build the configuration for disabling lower priority interrupts. Non EIC // interrupts need to be masked off with zero, EIC from the Cause register. @@ -657,7 +652,6 @@ void MipsSEFrameLowering::emitEpilogue(MachineFunction &MF, const MipsSEInstrInfo &TII = *static_cast<const MipsSEInstrInfo *>(STI.getInstrInfo()); - const MipsRegisterInfo &RegInfo = *STI.getRegisterInfo(); DebugLoc DL = MBBI != MBB.end() ? MBBI->getDebugLoc() : DebugLoc(); MipsABIInfo ABI = STI.getABI(); @@ -690,8 +684,7 @@ void MipsSEFrameLowering::emitEpilogue(MachineFunction &MF, // Insert instructions that restore eh data registers. for (int J = 0; J < 4; ++J) { TII.loadRegFromStackSlot(MBB, I, ABI.GetEhDataReg(J), - MipsFI->getEhDataRegFI(J), RC, &RegInfo, - Register()); + MipsFI->getEhDataRegFI(J), RC, Register()); } } @@ -722,17 +715,15 @@ void MipsSEFrameLowering::emitInterruptEpilogueStub( BuildMI(MBB, MBBI, DL, STI.getInstrInfo()->get(Mips::EHB)); // Restore EPC - STI.getInstrInfo()->loadRegFromStackSlot(MBB, MBBI, Mips::K1, - MipsFI->getISRRegFI(0), PtrRC, - STI.getRegisterInfo(), Register()); + STI.getInstrInfo()->loadRegFromStackSlot( + MBB, MBBI, Mips::K1, MipsFI->getISRRegFI(0), PtrRC, Register()); BuildMI(MBB, MBBI, DL, STI.getInstrInfo()->get(Mips::MTC0), Mips::COP014) .addReg(Mips::K1) .addImm(0); // Restore Status - STI.getInstrInfo()->loadRegFromStackSlot(MBB, MBBI, Mips::K1, - MipsFI->getISRRegFI(1), PtrRC, - STI.getRegisterInfo(), Register()); + STI.getInstrInfo()->loadRegFromStackSlot( + MBB, MBBI, Mips::K1, MipsFI->getISRRegFI(1), PtrRC, Register()); BuildMI(MBB, MBBI, DL, STI.getInstrInfo()->get(Mips::MTC0), Mips::COP012) .addReg(Mips::K1) .addImm(0); @@ -795,7 +786,7 @@ bool MipsSEFrameLowering::spillCalleeSavedRegisters( // Insert the spill to the stack frame. bool IsKill = !IsRAAndRetAddrIsTaken; const TargetRegisterClass *RC = TRI->getMinimalPhysRegClass(Reg); - TII.storeRegToStackSlot(MBB, MI, Reg, IsKill, I.getFrameIdx(), RC, TRI, + TII.storeRegToStackSlot(MBB, MI, Reg, IsKill, I.getFrameIdx(), RC, Register()); } diff --git a/llvm/lib/Target/Mips/MipsSEInstrInfo.cpp b/llvm/lib/Target/Mips/MipsSEInstrInfo.cpp index dbdbb17..a1d0aa0 100644 --- a/llvm/lib/Target/Mips/MipsSEInstrInfo.cpp +++ b/llvm/lib/Target/Mips/MipsSEInstrInfo.cpp @@ -28,11 +28,7 @@ static unsigned getUnconditionalBranch(const MipsSubtarget &STI) { } MipsSEInstrInfo::MipsSEInstrInfo(const MipsSubtarget &STI) - : MipsInstrInfo(STI, getUnconditionalBranch(STI)), RI(STI) {} - -const MipsRegisterInfo &MipsSEInstrInfo::getRegisterInfo() const { - return RI; -} + : MipsInstrInfo(STI, RI, getUnconditionalBranch(STI)), RI(STI) {} /// isLoadFromStackSlot - If the specified machine instruction is a direct /// load from a stack slot, return the virtual or physical register number of @@ -213,7 +209,6 @@ void MipsSEInstrInfo::storeRegToStack(MachineBasicBlock &MBB, MachineBasicBlock::iterator I, Register SrcReg, bool isKill, int FI, const TargetRegisterClass *RC, - const TargetRegisterInfo *TRI, int64_t Offset, MachineInstr::MIFlag Flags) const { DebugLoc DL; @@ -239,16 +234,16 @@ void MipsSEInstrInfo::storeRegToStack(MachineBasicBlock &MBB, Opc = Mips::SDC1; else if (Mips::FGR64RegClass.hasSubClassEq(RC)) Opc = Mips::SDC164; - else if (TRI->isTypeLegalForClass(*RC, MVT::v16i8)) + else if (RI.isTypeLegalForClass(*RC, MVT::v16i8)) Opc = Mips::ST_B; - else if (TRI->isTypeLegalForClass(*RC, MVT::v8i16) || - TRI->isTypeLegalForClass(*RC, MVT::v8f16)) + else if (RI.isTypeLegalForClass(*RC, MVT::v8i16) || + RI.isTypeLegalForClass(*RC, MVT::v8f16)) Opc = Mips::ST_H; - else if (TRI->isTypeLegalForClass(*RC, MVT::v4i32) || - TRI->isTypeLegalForClass(*RC, MVT::v4f32)) + else if (RI.isTypeLegalForClass(*RC, MVT::v4i32) || + RI.isTypeLegalForClass(*RC, MVT::v4f32)) Opc = Mips::ST_W; - else if (TRI->isTypeLegalForClass(*RC, MVT::v2i64) || - TRI->isTypeLegalForClass(*RC, MVT::v2f64)) + else if (RI.isTypeLegalForClass(*RC, MVT::v2i64) || + RI.isTypeLegalForClass(*RC, MVT::v2f64)) Opc = Mips::ST_D; else if (Mips::LO32RegClass.hasSubClassEq(RC)) Opc = Mips::SW; @@ -285,10 +280,12 @@ void MipsSEInstrInfo::storeRegToStack(MachineBasicBlock &MBB, .addFrameIndex(FI).addImm(Offset).addMemOperand(MMO); } -void MipsSEInstrInfo::loadRegFromStack( - MachineBasicBlock &MBB, MachineBasicBlock::iterator I, Register DestReg, - int FI, const TargetRegisterClass *RC, const TargetRegisterInfo *TRI, - int64_t Offset, MachineInstr::MIFlag Flags) const { +void MipsSEInstrInfo::loadRegFromStack(MachineBasicBlock &MBB, + MachineBasicBlock::iterator I, + Register DestReg, int FI, + const TargetRegisterClass *RC, + int64_t Offset, + MachineInstr::MIFlag Flags) const { DebugLoc DL; if (I != MBB.end()) DL = I->getDebugLoc(); MachineMemOperand *MMO = GetMemOperand(MBB, FI, MachineMemOperand::MOLoad); @@ -317,16 +314,16 @@ void MipsSEInstrInfo::loadRegFromStack( Opc = Mips::LDC1; else if (Mips::FGR64RegClass.hasSubClassEq(RC)) Opc = Mips::LDC164; - else if (TRI->isTypeLegalForClass(*RC, MVT::v16i8)) + else if (RI.isTypeLegalForClass(*RC, MVT::v16i8)) Opc = Mips::LD_B; - else if (TRI->isTypeLegalForClass(*RC, MVT::v8i16) || - TRI->isTypeLegalForClass(*RC, MVT::v8f16)) + else if (RI.isTypeLegalForClass(*RC, MVT::v8i16) || + RI.isTypeLegalForClass(*RC, MVT::v8f16)) Opc = Mips::LD_H; - else if (TRI->isTypeLegalForClass(*RC, MVT::v4i32) || - TRI->isTypeLegalForClass(*RC, MVT::v4f32)) + else if (RI.isTypeLegalForClass(*RC, MVT::v4i32) || + RI.isTypeLegalForClass(*RC, MVT::v4f32)) Opc = Mips::LD_W; - else if (TRI->isTypeLegalForClass(*RC, MVT::v2i64) || - TRI->isTypeLegalForClass(*RC, MVT::v2f64)) + else if (RI.isTypeLegalForClass(*RC, MVT::v2i64) || + RI.isTypeLegalForClass(*RC, MVT::v2f64)) Opc = Mips::LD_D; else if (Mips::HI32RegClass.hasSubClassEq(RC)) Opc = Mips::LW; @@ -682,8 +679,8 @@ MipsSEInstrInfo::compareOpndSize(unsigned Opc, const MCInstrDesc &Desc = get(Opc); assert(Desc.NumOperands == 2 && "Unary instruction expected."); const MipsRegisterInfo *RI = &getRegisterInfo(); - unsigned DstRegSize = RI->getRegSizeInBits(*getRegClass(Desc, 0, RI)); - unsigned SrcRegSize = RI->getRegSizeInBits(*getRegClass(Desc, 1, RI)); + unsigned DstRegSize = RI->getRegSizeInBits(*getRegClass(Desc, 0)); + unsigned SrcRegSize = RI->getRegSizeInBits(*getRegClass(Desc, 1)); return std::make_pair(DstRegSize > SrcRegSize, DstRegSize < SrcRegSize); } diff --git a/llvm/lib/Target/Mips/MipsSEInstrInfo.h b/llvm/lib/Target/Mips/MipsSEInstrInfo.h index 2b4f55d..5c48ccd 100644 --- a/llvm/lib/Target/Mips/MipsSEInstrInfo.h +++ b/llvm/lib/Target/Mips/MipsSEInstrInfo.h @@ -24,7 +24,7 @@ class MipsSEInstrInfo : public MipsInstrInfo { public: explicit MipsSEInstrInfo(const MipsSubtarget &STI); - const MipsRegisterInfo &getRegisterInfo() const override; + const MipsSERegisterInfo &getRegisterInfo() const { return RI; } /// isLoadFromStackSlot - If the specified machine instruction is a direct /// load from a stack slot, return the virtual or physical register number of @@ -50,13 +50,12 @@ public: void storeRegToStack( MachineBasicBlock &MBB, MachineBasicBlock::iterator MI, Register SrcReg, bool isKill, int FrameIndex, const TargetRegisterClass *RC, - const TargetRegisterInfo *TRI, int64_t Offset, + int64_t Offset, MachineInstr::MIFlag Flags = MachineInstr::NoFlags) const override; void loadRegFromStack( MachineBasicBlock &MBB, MachineBasicBlock::iterator MI, Register DestReg, - int FrameIndex, const TargetRegisterClass *RC, - const TargetRegisterInfo *TRI, int64_t Offset, + int FrameIndex, const TargetRegisterClass *RC, int64_t Offset, MachineInstr::MIFlag Flags = MachineInstr::NoFlags) const override; bool expandPostRAPseudo(MachineInstr &MI) const override; diff --git a/llvm/lib/Target/NVPTX/NVPTXInstrInfo.cpp b/llvm/lib/Target/NVPTX/NVPTXInstrInfo.cpp index 6840c7a..db2d96f 100644 --- a/llvm/lib/Target/NVPTX/NVPTXInstrInfo.cpp +++ b/llvm/lib/Target/NVPTX/NVPTXInstrInfo.cpp @@ -26,7 +26,7 @@ using namespace llvm; void NVPTXInstrInfo::anchor() {} NVPTXInstrInfo::NVPTXInstrInfo(const NVPTXSubtarget &STI) - : NVPTXGenInstrInfo(STI), RegInfo() {} + : NVPTXGenInstrInfo(STI, RegInfo), RegInfo() {} void NVPTXInstrInfo::copyPhysReg(MachineBasicBlock &MBB, MachineBasicBlock::iterator I, diff --git a/llvm/lib/Target/PowerPC/PPCFrameLowering.cpp b/llvm/lib/Target/PowerPC/PPCFrameLowering.cpp index 910bc9d..aae3e49 100644 --- a/llvm/lib/Target/PowerPC/PPCFrameLowering.cpp +++ b/llvm/lib/Target/PowerPC/PPCFrameLowering.cpp @@ -2520,11 +2520,11 @@ bool PPCFrameLowering::spillCalleeSavedRegisters( // saved vector registers. if (Subtarget.needsSwapsForVSXMemOps() && !MF->getFunction().hasFnAttribute(Attribute::NoUnwind)) - TII.storeRegToStackSlotNoUpd(MBB, MI, Reg, !IsLiveIn, - I.getFrameIdx(), RC, TRI); + TII.storeRegToStackSlotNoUpd(MBB, MI, Reg, !IsLiveIn, I.getFrameIdx(), + RC); else TII.storeRegToStackSlot(MBB, MI, Reg, !IsLiveIn, I.getFrameIdx(), RC, - TRI, Register()); + Register()); } } } @@ -2690,10 +2690,9 @@ bool PPCFrameLowering::restoreCalleeSavedRegisters( // saved vector registers. if (Subtarget.needsSwapsForVSXMemOps() && !MF->getFunction().hasFnAttribute(Attribute::NoUnwind)) - TII.loadRegFromStackSlotNoUpd(MBB, I, Reg, CSI[i].getFrameIdx(), RC, - TRI); + TII.loadRegFromStackSlotNoUpd(MBB, I, Reg, CSI[i].getFrameIdx(), RC); else - TII.loadRegFromStackSlot(MBB, I, Reg, CSI[i].getFrameIdx(), RC, TRI, + TII.loadRegFromStackSlot(MBB, I, Reg, CSI[i].getFrameIdx(), RC, Register()); assert(I != MBB.begin() && diff --git a/llvm/lib/Target/PowerPC/PPCInstrInfo.cpp b/llvm/lib/Target/PowerPC/PPCInstrInfo.cpp index 3014aa6..366a7b6 100644 --- a/llvm/lib/Target/PowerPC/PPCInstrInfo.cpp +++ b/llvm/lib/Target/PowerPC/PPCInstrInfo.cpp @@ -89,7 +89,7 @@ static cl::opt<bool> EnableFMARegPressureReduction( void PPCInstrInfo::anchor() {} PPCInstrInfo::PPCInstrInfo(const PPCSubtarget &STI) - : PPCGenInstrInfo(STI, PPC::ADJCALLSTACKDOWN, PPC::ADJCALLSTACKUP, + : PPCGenInstrInfo(STI, RI, PPC::ADJCALLSTACKDOWN, PPC::ADJCALLSTACKUP, /* CatchRetOpcode */ -1, STI.isPPC64() ? PPC::BLR8 : PPC::BLR), Subtarget(STI), RI(STI.getTargetMachine()) {} @@ -2014,8 +2014,7 @@ void PPCInstrInfo::StoreRegToStackSlot( void PPCInstrInfo::storeRegToStackSlotNoUpd( MachineBasicBlock &MBB, MachineBasicBlock::iterator MI, unsigned SrcReg, - bool isKill, int FrameIdx, const TargetRegisterClass *RC, - const TargetRegisterInfo *TRI) const { + bool isKill, int FrameIdx, const TargetRegisterClass *RC) const { MachineFunction &MF = *MBB.getParent(); SmallVector<MachineInstr *, 4> NewMIs; @@ -2034,8 +2033,7 @@ void PPCInstrInfo::storeRegToStackSlotNoUpd( void PPCInstrInfo::storeRegToStackSlot( MachineBasicBlock &MBB, MachineBasicBlock::iterator MI, Register SrcReg, - bool isKill, int FrameIdx, const TargetRegisterClass *RC, - const TargetRegisterInfo *TRI, Register VReg, + bool isKill, int FrameIdx, const TargetRegisterClass *RC, Register VReg, MachineInstr::MIFlag Flags) const { // We need to avoid a situation in which the value from a VRRC register is // spilled using an Altivec instruction and reloaded into a VSRC register @@ -2045,7 +2043,7 @@ void PPCInstrInfo::storeRegToStackSlot( // the register is defined using an Altivec instruction and is then used by a // VSX instruction. RC = updatedRC(RC); - storeRegToStackSlotNoUpd(MBB, MI, SrcReg, isKill, FrameIdx, RC, TRI); + storeRegToStackSlotNoUpd(MBB, MI, SrcReg, isKill, FrameIdx, RC); } void PPCInstrInfo::LoadRegFromStackSlot(MachineFunction &MF, const DebugLoc &DL, @@ -2060,8 +2058,7 @@ void PPCInstrInfo::LoadRegFromStackSlot(MachineFunction &MF, const DebugLoc &DL, void PPCInstrInfo::loadRegFromStackSlotNoUpd( MachineBasicBlock &MBB, MachineBasicBlock::iterator MI, unsigned DestReg, - int FrameIdx, const TargetRegisterClass *RC, - const TargetRegisterInfo *TRI) const { + int FrameIdx, const TargetRegisterClass *RC) const { MachineFunction &MF = *MBB.getParent(); SmallVector<MachineInstr*, 4> NewMIs; DebugLoc DL; @@ -2080,10 +2077,12 @@ void PPCInstrInfo::loadRegFromStackSlotNoUpd( NewMIs.back()->addMemOperand(MF, MMO); } -void PPCInstrInfo::loadRegFromStackSlot( - MachineBasicBlock &MBB, MachineBasicBlock::iterator MI, Register DestReg, - int FrameIdx, const TargetRegisterClass *RC, const TargetRegisterInfo *TRI, - Register VReg, MachineInstr::MIFlag Flags) const { +void PPCInstrInfo::loadRegFromStackSlot(MachineBasicBlock &MBB, + MachineBasicBlock::iterator MI, + Register DestReg, int FrameIdx, + const TargetRegisterClass *RC, + Register VReg, + MachineInstr::MIFlag Flags) const { // We need to avoid a situation in which the value from a VRRC register is // spilled using an Altivec instruction and reloaded into a VSRC register // using a VSX instruction. The issue with this is that the VSX @@ -2093,7 +2092,7 @@ void PPCInstrInfo::loadRegFromStackSlot( // VSX instruction. RC = updatedRC(RC); - loadRegFromStackSlotNoUpd(MBB, MI, DestReg, FrameIdx, RC, TRI); + loadRegFromStackSlotNoUpd(MBB, MI, DestReg, FrameIdx, RC); } bool PPCInstrInfo:: diff --git a/llvm/lib/Target/PowerPC/PPCInstrInfo.h b/llvm/lib/Target/PowerPC/PPCInstrInfo.h index d67fc28..8b824bc 100644 --- a/llvm/lib/Target/PowerPC/PPCInstrInfo.h +++ b/llvm/lib/Target/PowerPC/PPCInstrInfo.h @@ -570,7 +570,8 @@ public: void storeRegToStackSlot( MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI, Register SrcReg, bool isKill, int FrameIndex, const TargetRegisterClass *RC, - const TargetRegisterInfo *TRI, Register VReg, + + Register VReg, MachineInstr::MIFlag Flags = MachineInstr::NoFlags) const override; // Emits a register spill without updating the register class for vector @@ -579,13 +580,13 @@ public: void storeRegToStackSlotNoUpd(MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI, unsigned SrcReg, bool isKill, int FrameIndex, - const TargetRegisterClass *RC, - const TargetRegisterInfo *TRI) const; + const TargetRegisterClass *RC) const; void loadRegFromStackSlot( MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI, Register DestReg, int FrameIndex, const TargetRegisterClass *RC, - const TargetRegisterInfo *TRI, Register VReg, + + Register VReg, MachineInstr::MIFlag Flags = MachineInstr::NoFlags) const override; // Emits a register reload without updating the register class for vector @@ -594,8 +595,7 @@ public: void loadRegFromStackSlotNoUpd(MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI, unsigned DestReg, int FrameIndex, - const TargetRegisterClass *RC, - const TargetRegisterInfo *TRI) const; + const TargetRegisterClass *RC) const; unsigned getStoreOpcodeForSpill(const TargetRegisterClass *RC) const; diff --git a/llvm/lib/Target/PowerPC/PPCRegisterInfo.cpp b/llvm/lib/Target/PowerPC/PPCRegisterInfo.cpp index 85b4072..b3a7c82 100644 --- a/llvm/lib/Target/PowerPC/PPCRegisterInfo.cpp +++ b/llvm/lib/Target/PowerPC/PPCRegisterInfo.cpp @@ -2023,7 +2023,7 @@ Register PPCRegisterInfo::materializeFrameBaseRegister(MachineBasicBlock *MBB, MachineRegisterInfo &MRI = MBB->getParent()->getRegInfo(); const TargetRegisterClass *RC = getPointerRegClass(); Register BaseReg = MRI.createVirtualRegister(RC); - MRI.constrainRegClass(BaseReg, TII.getRegClass(MCID, 0, this)); + MRI.constrainRegClass(BaseReg, TII.getRegClass(MCID, 0)); BuildMI(*MBB, Ins, DL, MCID, BaseReg) .addFrameIndex(FrameIdx).addImm(Offset); @@ -2051,7 +2051,7 @@ void PPCRegisterInfo::resolveFrameIndex(MachineInstr &MI, Register BaseReg, const TargetInstrInfo &TII = *Subtarget.getInstrInfo(); const MCInstrDesc &MCID = MI.getDesc(); MachineRegisterInfo &MRI = MF.getRegInfo(); - MRI.constrainRegClass(BaseReg, TII.getRegClass(MCID, FIOperandNum, this)); + MRI.constrainRegClass(BaseReg, TII.getRegClass(MCID, FIOperandNum)); } bool PPCRegisterInfo::isFrameOffsetLegal(const MachineInstr *MI, diff --git a/llvm/lib/Target/RISCV/RISCVDeadRegisterDefinitions.cpp b/llvm/lib/Target/RISCV/RISCVDeadRegisterDefinitions.cpp index 51180f5..5d3d9b5 100644 --- a/llvm/lib/Target/RISCV/RISCVDeadRegisterDefinitions.cpp +++ b/llvm/lib/Target/RISCV/RISCVDeadRegisterDefinitions.cpp @@ -59,7 +59,6 @@ bool RISCVDeadRegisterDefinitions::runOnMachineFunction(MachineFunction &MF) { return false; const TargetInstrInfo *TII = MF.getSubtarget().getInstrInfo(); - const TargetRegisterInfo *TRI = MF.getSubtarget().getRegisterInfo(); LiveIntervals &LIS = getAnalysis<LiveIntervalsWrapperPass>().getLIS(); LLVM_DEBUG(dbgs() << "***** RISCVDeadRegisterDefinitions *****\n"); @@ -89,7 +88,7 @@ bool RISCVDeadRegisterDefinitions::runOnMachineFunction(MachineFunction &MF) { LLVM_DEBUG(dbgs() << " Dead def operand #" << I << " in:\n "; MI.print(dbgs())); Register X0Reg; - const TargetRegisterClass *RC = TII->getRegClass(Desc, I, TRI); + const TargetRegisterClass *RC = TII->getRegClass(Desc, I); if (RC && RC->contains(RISCV::X0)) { X0Reg = RISCV::X0; } else if (RC && RC->contains(RISCV::X0_W)) { diff --git a/llvm/lib/Target/RISCV/RISCVFrameLowering.cpp b/llvm/lib/Target/RISCV/RISCVFrameLowering.cpp index f881c4c..f7fc952 100644 --- a/llvm/lib/Target/RISCV/RISCVFrameLowering.cpp +++ b/llvm/lib/Target/RISCV/RISCVFrameLowering.cpp @@ -291,12 +291,12 @@ static void emitSiFiveCLICPreemptibleSaves(MachineFunction &MF, // which affects other passes. TII->storeRegToStackSlot(MBB, MBBI, RISCV::X8, /* IsKill=*/true, RVFI->getInterruptCSRFrameIndex(0), - &RISCV::GPRRegClass, STI.getRegisterInfo(), - Register(), MachineInstr::FrameSetup); + &RISCV::GPRRegClass, Register(), + MachineInstr::FrameSetup); TII->storeRegToStackSlot(MBB, MBBI, RISCV::X9, /* IsKill=*/true, RVFI->getInterruptCSRFrameIndex(1), - &RISCV::GPRRegClass, STI.getRegisterInfo(), - Register(), MachineInstr::FrameSetup); + &RISCV::GPRRegClass, Register(), + MachineInstr::FrameSetup); // Put `mcause` into X8 (s0), and `mepc` into X9 (s1). If either of these are // used in the function, then they will appear in `getUnmanagedCSI` and will @@ -357,14 +357,12 @@ static void emitSiFiveCLICPreemptibleRestores(MachineFunction &MF, // X8 and X9 need to be restored to their values on function entry, which we // saved onto the stack in `emitSiFiveCLICPreemptibleSaves`. - TII->loadRegFromStackSlot(MBB, MBBI, RISCV::X9, - RVFI->getInterruptCSRFrameIndex(1), - &RISCV::GPRRegClass, STI.getRegisterInfo(), - Register(), MachineInstr::FrameSetup); - TII->loadRegFromStackSlot(MBB, MBBI, RISCV::X8, - RVFI->getInterruptCSRFrameIndex(0), - &RISCV::GPRRegClass, STI.getRegisterInfo(), - Register(), MachineInstr::FrameSetup); + TII->loadRegFromStackSlot( + MBB, MBBI, RISCV::X9, RVFI->getInterruptCSRFrameIndex(1), + &RISCV::GPRRegClass, Register(), MachineInstr::FrameSetup); + TII->loadRegFromStackSlot( + MBB, MBBI, RISCV::X8, RVFI->getInterruptCSRFrameIndex(0), + &RISCV::GPRRegClass, Register(), MachineInstr::FrameSetup); } // Get the ID of the libcall used for spilling and restoring callee saved @@ -2177,7 +2175,7 @@ bool RISCVFrameLowering::spillCalleeSavedRegisters( MCRegister Reg = CS.getReg(); const TargetRegisterClass *RC = TRI->getMinimalPhysRegClass(Reg); TII.storeRegToStackSlot(MBB, MI, Reg, !MBB.isLiveIn(Reg), - CS.getFrameIdx(), RC, TRI, Register(), + CS.getFrameIdx(), RC, Register(), MachineInstr::FrameSetup); } }; @@ -2267,8 +2265,8 @@ bool RISCVFrameLowering::restoreCalleeSavedRegisters( for (auto &CS : CSInfo) { MCRegister Reg = CS.getReg(); const TargetRegisterClass *RC = TRI->getMinimalPhysRegClass(Reg); - TII.loadRegFromStackSlot(MBB, MI, Reg, CS.getFrameIdx(), RC, TRI, - Register(), MachineInstr::FrameDestroy); + TII.loadRegFromStackSlot(MBB, MI, Reg, CS.getFrameIdx(), RC, Register(), + MachineInstr::FrameDestroy); assert(MI != MBB.begin() && "loadRegFromStackSlot didn't insert any code!"); } diff --git a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp index a3ccbd8..4d86a36 100644 --- a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp +++ b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp @@ -22203,8 +22203,7 @@ static MachineBasicBlock *emitSplitF64Pseudo(MachineInstr &MI, MachineFunction &MF = *BB->getParent(); DebugLoc DL = MI.getDebugLoc(); - const TargetInstrInfo &TII = *MF.getSubtarget().getInstrInfo(); - const TargetRegisterInfo *RI = MF.getSubtarget().getRegisterInfo(); + const RISCVInstrInfo &TII = *MF.getSubtarget<RISCVSubtarget>().getInstrInfo(); Register LoReg = MI.getOperand(0).getReg(); Register HiReg = MI.getOperand(1).getReg(); Register SrcReg = MI.getOperand(2).getReg(); @@ -22213,7 +22212,7 @@ static MachineBasicBlock *emitSplitF64Pseudo(MachineInstr &MI, int FI = MF.getInfo<RISCVMachineFunctionInfo>()->getMoveF64FrameIndex(MF); TII.storeRegToStackSlot(*BB, MI, SrcReg, MI.getOperand(2).isKill(), FI, SrcRC, - RI, Register()); + Register()); MachinePointerInfo MPI = MachinePointerInfo::getFixedStack(MF, FI); MachineMemOperand *MMOLo = MF.getMachineMemOperand(MPI, MachineMemOperand::MOLoad, 4, Align(8)); @@ -22239,8 +22238,7 @@ static MachineBasicBlock *emitBuildPairF64Pseudo(MachineInstr &MI, MachineFunction &MF = *BB->getParent(); DebugLoc DL = MI.getDebugLoc(); - const TargetInstrInfo &TII = *MF.getSubtarget().getInstrInfo(); - const TargetRegisterInfo *RI = MF.getSubtarget().getRegisterInfo(); + const RISCVInstrInfo &TII = *MF.getSubtarget<RISCVSubtarget>().getInstrInfo(); Register DstReg = MI.getOperand(0).getReg(); Register LoReg = MI.getOperand(1).getReg(); Register HiReg = MI.getOperand(2).getReg(); @@ -22263,7 +22261,7 @@ static MachineBasicBlock *emitBuildPairF64Pseudo(MachineInstr &MI, .addFrameIndex(FI) .addImm(4) .addMemOperand(MMOHi); - TII.loadRegFromStackSlot(*BB, MI, DstReg, FI, DstRC, RI, Register()); + TII.loadRegFromStackSlot(*BB, MI, DstReg, FI, DstRC, Register()); MI.eraseFromParent(); // The pseudo instruction is gone now. return BB; } diff --git a/llvm/lib/Target/RISCV/RISCVInstrInfo.cpp b/llvm/lib/Target/RISCV/RISCVInstrInfo.cpp index b05956b..e0cdd11 100644 --- a/llvm/lib/Target/RISCV/RISCVInstrInfo.cpp +++ b/llvm/lib/Target/RISCV/RISCVInstrInfo.cpp @@ -82,8 +82,9 @@ namespace llvm::RISCV { } // end namespace llvm::RISCV RISCVInstrInfo::RISCVInstrInfo(const RISCVSubtarget &STI) - : RISCVGenInstrInfo(STI, RISCV::ADJCALLSTACKDOWN, RISCV::ADJCALLSTACKUP), - STI(STI) {} + : RISCVGenInstrInfo(STI, RegInfo, RISCV::ADJCALLSTACKDOWN, + RISCV::ADJCALLSTACKUP), + RegInfo(STI.getHwMode()), STI(STI) {} #define GET_INSTRINFO_HELPERS #include "RISCVGenInstrInfo.inc" @@ -638,7 +639,6 @@ void RISCVInstrInfo::storeRegToStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator I, Register SrcReg, bool IsKill, int FI, const TargetRegisterClass *RC, - const TargetRegisterInfo *TRI, Register VReg, MachineInstr::MIFlag Flags) const { MachineFunction *MF = MBB.getParent(); @@ -646,8 +646,8 @@ void RISCVInstrInfo::storeRegToStackSlot(MachineBasicBlock &MBB, unsigned Opcode; if (RISCV::GPRRegClass.hasSubClassEq(RC)) { - Opcode = TRI->getRegSizeInBits(RISCV::GPRRegClass) == 32 ? - RISCV::SW : RISCV::SD; + Opcode = RegInfo.getRegSizeInBits(RISCV::GPRRegClass) == 32 ? RISCV::SW + : RISCV::SD; } else if (RISCV::GPRF16RegClass.hasSubClassEq(RC)) { Opcode = RISCV::SH_INX; } else if (RISCV::GPRF32RegClass.hasSubClassEq(RC)) { @@ -704,7 +704,7 @@ void RISCVInstrInfo::storeRegToStackSlot(MachineBasicBlock &MBB, .addFrameIndex(FI) .addMemOperand(MMO) .setMIFlag(Flags); - NumVRegSpilled += TRI->getRegSizeInBits(*RC) / RISCV::RVVBitsPerBlock; + NumVRegSpilled += RegInfo.getRegSizeInBits(*RC) / RISCV::RVVBitsPerBlock; } else { MachineMemOperand *MMO = MF->getMachineMemOperand( MachinePointerInfo::getFixedStack(*MF, FI), MachineMemOperand::MOStore, @@ -719,10 +719,12 @@ void RISCVInstrInfo::storeRegToStackSlot(MachineBasicBlock &MBB, } } -void RISCVInstrInfo::loadRegFromStackSlot( - MachineBasicBlock &MBB, MachineBasicBlock::iterator I, Register DstReg, - int FI, const TargetRegisterClass *RC, const TargetRegisterInfo *TRI, - Register VReg, MachineInstr::MIFlag Flags) const { +void RISCVInstrInfo::loadRegFromStackSlot(MachineBasicBlock &MBB, + MachineBasicBlock::iterator I, + Register DstReg, int FI, + const TargetRegisterClass *RC, + Register VReg, + MachineInstr::MIFlag Flags) const { MachineFunction *MF = MBB.getParent(); MachineFrameInfo &MFI = MF->getFrameInfo(); DebugLoc DL = @@ -730,8 +732,8 @@ void RISCVInstrInfo::loadRegFromStackSlot( unsigned Opcode; if (RISCV::GPRRegClass.hasSubClassEq(RC)) { - Opcode = TRI->getRegSizeInBits(RISCV::GPRRegClass) == 32 ? - RISCV::LW : RISCV::LD; + Opcode = RegInfo.getRegSizeInBits(RISCV::GPRRegClass) == 32 ? RISCV::LW + : RISCV::LD; } else if (RISCV::GPRF16RegClass.hasSubClassEq(RC)) { Opcode = RISCV::LH_INX; } else if (RISCV::GPRF32RegClass.hasSubClassEq(RC)) { @@ -787,7 +789,7 @@ void RISCVInstrInfo::loadRegFromStackSlot( .addFrameIndex(FI) .addMemOperand(MMO) .setMIFlag(Flags); - NumVRegReloaded += TRI->getRegSizeInBits(*RC) / RISCV::RVVBitsPerBlock; + NumVRegReloaded += RegInfo.getRegSizeInBits(*RC) / RISCV::RVVBitsPerBlock; } else { MachineMemOperand *MMO = MF->getMachineMemOperand( MachinePointerInfo::getFixedStack(*MF, FI), MachineMemOperand::MOLoad, @@ -1378,14 +1380,14 @@ void RISCVInstrInfo::insertIndirectBranch(MachineBasicBlock &MBB, report_fatal_error("underestimated function size"); storeRegToStackSlot(MBB, MI, TmpGPR, /*IsKill=*/true, FrameIndex, - &RISCV::GPRRegClass, TRI, Register()); + &RISCV::GPRRegClass, Register()); TRI->eliminateFrameIndex(std::prev(MI.getIterator()), /*SpAdj=*/0, /*FIOperandNum=*/1); MI.getOperand(1).setMBB(&RestoreBB); loadRegFromStackSlot(RestoreBB, RestoreBB.end(), TmpGPR, FrameIndex, - &RISCV::GPRRegClass, TRI, Register()); + &RISCV::GPRRegClass, Register()); TRI->eliminateFrameIndex(RestoreBB.back(), /*SpAdj=*/0, /*FIOperandNum=*/1); } diff --git a/llvm/lib/Target/RISCV/RISCVInstrInfo.h b/llvm/lib/Target/RISCV/RISCVInstrInfo.h index c5eddb9..0ffe015 100644 --- a/llvm/lib/Target/RISCV/RISCVInstrInfo.h +++ b/llvm/lib/Target/RISCV/RISCVInstrInfo.h @@ -79,10 +79,13 @@ enum RISCVMachineCombinerPattern : unsigned { }; class RISCVInstrInfo : public RISCVGenInstrInfo { + const RISCVRegisterInfo RegInfo; public: explicit RISCVInstrInfo(const RISCVSubtarget &STI); + const RISCVRegisterInfo &getRegisterInfo() const { return RegInfo; } + MCInst getNop() const override; Register isLoadFromStackSlot(const MachineInstr &MI, @@ -113,13 +116,13 @@ public: void storeRegToStackSlot( MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI, Register SrcReg, bool IsKill, int FrameIndex, const TargetRegisterClass *RC, - const TargetRegisterInfo *TRI, Register VReg, + + Register VReg, MachineInstr::MIFlag Flags = MachineInstr::NoFlags) const override; void loadRegFromStackSlot( MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI, Register DstReg, - int FrameIndex, const TargetRegisterClass *RC, - const TargetRegisterInfo *TRI, Register VReg, + int FrameIndex, const TargetRegisterClass *RC, Register VReg, MachineInstr::MIFlag Flags = MachineInstr::NoFlags) const override; using TargetInstrInfo::foldMemoryOperandImpl; diff --git a/llvm/lib/Target/RISCV/RISCVSubtarget.cpp b/llvm/lib/Target/RISCV/RISCVSubtarget.cpp index 715ac4c..3b43be3 100644 --- a/llvm/lib/Target/RISCV/RISCVSubtarget.cpp +++ b/llvm/lib/Target/RISCV/RISCVSubtarget.cpp @@ -104,7 +104,7 @@ RISCVSubtarget::RISCVSubtarget(const Triple &TT, StringRef CPU, RVVVectorBitsMin(RVVVectorBitsMin), RVVVectorBitsMax(RVVVectorBitsMax), FrameLowering( initializeSubtargetDependencies(TT, CPU, TuneCPU, FS, ABIName)), - InstrInfo(*this), RegInfo(getHwMode()), TLInfo(TM, *this) { + InstrInfo(*this), TLInfo(TM, *this) { TSInfo = std::make_unique<RISCVSelectionDAGInfo>(); } diff --git a/llvm/lib/Target/RISCV/RISCVSubtarget.h b/llvm/lib/Target/RISCV/RISCVSubtarget.h index 4b4fc8f..4026364 100644 --- a/llvm/lib/Target/RISCV/RISCVSubtarget.h +++ b/llvm/lib/Target/RISCV/RISCVSubtarget.h @@ -112,7 +112,6 @@ private: RISCVFrameLowering FrameLowering; RISCVInstrInfo InstrInfo; - RISCVRegisterInfo RegInfo; RISCVTargetLowering TLInfo; /// Initializes using the passed in CPU and feature strings so that we can @@ -140,13 +139,14 @@ public: } const RISCVInstrInfo *getInstrInfo() const override { return &InstrInfo; } const RISCVRegisterInfo *getRegisterInfo() const override { - return &RegInfo; + return &InstrInfo.getRegisterInfo(); } const RISCVTargetLowering *getTargetLowering() const override { return &TLInfo; } bool enableMachineScheduler() const override { return true; } + bool enableTerminalRule() const override { return true; } bool enablePostRAScheduler() const override { return UsePostRAScheduler; } diff --git a/llvm/lib/Target/RISCV/RISCVVectorPeephole.cpp b/llvm/lib/Target/RISCV/RISCVVectorPeephole.cpp index fdf9a4f..e1ff243 100644 --- a/llvm/lib/Target/RISCV/RISCVVectorPeephole.cpp +++ b/llvm/lib/Target/RISCV/RISCVVectorPeephole.cpp @@ -455,7 +455,7 @@ bool RISCVVectorPeephole::convertSameMaskVMergeToVMv(MachineInstr &MI) { True->getOperand(1).setReg(MI.getOperand(2).getReg()); // If True is masked then its passthru needs to be in VRNoV0. MRI->constrainRegClass(True->getOperand(1).getReg(), - TII->getRegClass(True->getDesc(), 1, TRI)); + TII->getRegClass(True->getDesc(), 1)); } MI.setDesc(TII->get(NewOpc)); @@ -675,7 +675,7 @@ bool RISCVVectorPeephole::foldVMV_V_V(MachineInstr &MI) { if (Passthru.getReg().isValid()) MRI->constrainRegClass( Passthru.getReg(), - TII->getRegClass(Src->getDesc(), SrcPassthru.getOperandNo(), TRI)); + TII->getRegClass(Src->getDesc(), SrcPassthru.getOperandNo())); } if (RISCVII::hasVecPolicyOp(Src->getDesc().TSFlags)) { diff --git a/llvm/lib/Target/SPIRV/SPIRVInstrInfo.cpp b/llvm/lib/Target/SPIRV/SPIRVInstrInfo.cpp index ba95ad8..4f8bf43 100644 --- a/llvm/lib/Target/SPIRV/SPIRVInstrInfo.cpp +++ b/llvm/lib/Target/SPIRV/SPIRVInstrInfo.cpp @@ -24,7 +24,7 @@ using namespace llvm; SPIRVInstrInfo::SPIRVInstrInfo(const SPIRVSubtarget &STI) - : SPIRVGenInstrInfo(STI) {} + : SPIRVGenInstrInfo(STI, RI) {} bool SPIRVInstrInfo::isConstantInstr(const MachineInstr &MI) const { switch (MI.getOpcode()) { diff --git a/llvm/lib/Target/SPIRV/SPIRVModuleAnalysis.cpp b/llvm/lib/Target/SPIRV/SPIRVModuleAnalysis.cpp index fbb127d..b8cd9c1 100644 --- a/llvm/lib/Target/SPIRV/SPIRVModuleAnalysis.cpp +++ b/llvm/lib/Target/SPIRV/SPIRVModuleAnalysis.cpp @@ -249,17 +249,18 @@ static InstrSignature instrToSignature(const MachineInstr &MI, InstrSignature Signature{MI.getOpcode()}; for (unsigned i = 0; i < MI.getNumOperands(); ++i) { // The only decorations that can be applied more than once to a given <id> - // or structure member are UserSemantic(5635), CacheControlLoadINTEL (6442), - // and CacheControlStoreINTEL (6443). For all the rest of decorations, we - // will only add to the signature the Opcode, the id to which it applies, - // and the decoration id, disregarding any decoration flags. This will - // ensure that any subsequent decoration with the same id will be deemed as - // a duplicate. Then, at the call site, we will be able to handle duplicates - // in the best way. + // or structure member are FuncParamAttr (38), UserSemantic (5635), + // CacheControlLoadINTEL (6442), and CacheControlStoreINTEL (6443). For all + // the rest of decorations, we will only add to the signature the Opcode, + // the id to which it applies, and the decoration id, disregarding any + // decoration flags. This will ensure that any subsequent decoration with + // the same id will be deemed as a duplicate. Then, at the call site, we + // will be able to handle duplicates in the best way. unsigned Opcode = MI.getOpcode(); if ((Opcode == SPIRV::OpDecorate) && i >= 2) { unsigned DecorationID = MI.getOperand(1).getImm(); - if (DecorationID != SPIRV::Decoration::UserSemantic && + if (DecorationID != SPIRV::Decoration::FuncParamAttr && + DecorationID != SPIRV::Decoration::UserSemantic && DecorationID != SPIRV::Decoration::CacheControlLoadINTEL && DecorationID != SPIRV::Decoration::CacheControlStoreINTEL) continue; diff --git a/llvm/lib/Target/Sparc/SparcInstrInfo.cpp b/llvm/lib/Target/Sparc/SparcInstrInfo.cpp index f66eb9d..6596379 100644 --- a/llvm/lib/Target/Sparc/SparcInstrInfo.cpp +++ b/llvm/lib/Target/Sparc/SparcInstrInfo.cpp @@ -38,8 +38,8 @@ static cl::opt<unsigned> void SparcInstrInfo::anchor() {} SparcInstrInfo::SparcInstrInfo(const SparcSubtarget &ST) - : SparcGenInstrInfo(ST, SP::ADJCALLSTACKDOWN, SP::ADJCALLSTACKUP), RI(ST), - Subtarget(ST) {} + : SparcGenInstrInfo(ST, RI, SP::ADJCALLSTACKDOWN, SP::ADJCALLSTACKUP), + RI(ST), Subtarget(ST) {} /// isLoadFromStackSlot - If the specified machine instruction is a direct /// load from a stack slot, return the virtual or physical register number of @@ -527,7 +527,6 @@ void SparcInstrInfo::storeRegToStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator I, Register SrcReg, bool isKill, int FI, const TargetRegisterClass *RC, - const TargetRegisterInfo *TRI, Register VReg, MachineInstr::MIFlag Flags) const { DebugLoc DL; @@ -564,10 +563,12 @@ void SparcInstrInfo::storeRegToStackSlot(MachineBasicBlock &MBB, llvm_unreachable("Can't store this register to stack slot"); } -void SparcInstrInfo::loadRegFromStackSlot( - MachineBasicBlock &MBB, MachineBasicBlock::iterator I, Register DestReg, - int FI, const TargetRegisterClass *RC, const TargetRegisterInfo *TRI, - Register VReg, MachineInstr::MIFlag Flags) const { +void SparcInstrInfo::loadRegFromStackSlot(MachineBasicBlock &MBB, + MachineBasicBlock::iterator I, + Register DestReg, int FI, + const TargetRegisterClass *RC, + Register VReg, + MachineInstr::MIFlag Flags) const { DebugLoc DL; if (I != MBB.end()) DL = I->getDebugLoc(); diff --git a/llvm/lib/Target/Sparc/SparcInstrInfo.h b/llvm/lib/Target/Sparc/SparcInstrInfo.h index 01d0204..273888f 100644 --- a/llvm/lib/Target/Sparc/SparcInstrInfo.h +++ b/llvm/lib/Target/Sparc/SparcInstrInfo.h @@ -92,14 +92,13 @@ public: void storeRegToStackSlot( MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI, Register SrcReg, - bool isKill, int FrameIndex, const TargetRegisterClass *RC, - const TargetRegisterInfo *TRI, Register VReg, + bool isKill, int FrameIndex, const TargetRegisterClass *RC, Register VReg, MachineInstr::MIFlag Flags = MachineInstr::NoFlags) const override; void loadRegFromStackSlot( MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI, Register DestReg, int FrameIndex, const TargetRegisterClass *RC, - const TargetRegisterInfo *TRI, Register VReg, + Register VReg, MachineInstr::MIFlag Flags = MachineInstr::NoFlags) const override; Register getGlobalBaseReg(MachineFunction *MF) const; diff --git a/llvm/lib/Target/SystemZ/SystemZFrameLowering.cpp b/llvm/lib/Target/SystemZ/SystemZFrameLowering.cpp index dcefff9..570bbd8 100644 --- a/llvm/lib/Target/SystemZ/SystemZFrameLowering.cpp +++ b/llvm/lib/Target/SystemZ/SystemZFrameLowering.cpp @@ -360,12 +360,12 @@ bool SystemZELFFrameLowering::spillCalleeSavedRegisters( if (SystemZ::FP64BitRegClass.contains(Reg)) { MBB.addLiveIn(Reg); TII->storeRegToStackSlot(MBB, MBBI, Reg, true, I.getFrameIdx(), - &SystemZ::FP64BitRegClass, TRI, Register()); + &SystemZ::FP64BitRegClass, Register()); } if (SystemZ::VR128BitRegClass.contains(Reg)) { MBB.addLiveIn(Reg); TII->storeRegToStackSlot(MBB, MBBI, Reg, true, I.getFrameIdx(), - &SystemZ::VR128BitRegClass, TRI, Register()); + &SystemZ::VR128BitRegClass, Register()); } } @@ -389,10 +389,10 @@ bool SystemZELFFrameLowering::restoreCalleeSavedRegisters( MCRegister Reg = I.getReg(); if (SystemZ::FP64BitRegClass.contains(Reg)) TII->loadRegFromStackSlot(MBB, MBBI, Reg, I.getFrameIdx(), - &SystemZ::FP64BitRegClass, TRI, Register()); + &SystemZ::FP64BitRegClass, Register()); if (SystemZ::VR128BitRegClass.contains(Reg)) TII->loadRegFromStackSlot(MBB, MBBI, Reg, I.getFrameIdx(), - &SystemZ::VR128BitRegClass, TRI, Register()); + &SystemZ::VR128BitRegClass, Register()); } // Restore call-saved GPRs (but not call-clobbered varargs, which at @@ -1157,12 +1157,12 @@ bool SystemZXPLINKFrameLowering::spillCalleeSavedRegisters( if (SystemZ::FP64BitRegClass.contains(Reg)) { MBB.addLiveIn(Reg); TII->storeRegToStackSlot(MBB, MBBI, Reg, true, I.getFrameIdx(), - &SystemZ::FP64BitRegClass, TRI, Register()); + &SystemZ::FP64BitRegClass, Register()); } if (SystemZ::VR128BitRegClass.contains(Reg)) { MBB.addLiveIn(Reg); TII->storeRegToStackSlot(MBB, MBBI, Reg, true, I.getFrameIdx(), - &SystemZ::VR128BitRegClass, TRI, Register()); + &SystemZ::VR128BitRegClass, Register()); } } @@ -1189,10 +1189,10 @@ bool SystemZXPLINKFrameLowering::restoreCalleeSavedRegisters( MCRegister Reg = I.getReg(); if (SystemZ::FP64BitRegClass.contains(Reg)) TII->loadRegFromStackSlot(MBB, MBBI, Reg, I.getFrameIdx(), - &SystemZ::FP64BitRegClass, TRI, Register()); + &SystemZ::FP64BitRegClass, Register()); if (SystemZ::VR128BitRegClass.contains(Reg)) TII->loadRegFromStackSlot(MBB, MBBI, Reg, I.getFrameIdx(), - &SystemZ::VR128BitRegClass, TRI, Register()); + &SystemZ::VR128BitRegClass, Register()); } // Restore call-saved GPRs (but not call-clobbered varargs, which at diff --git a/llvm/lib/Target/SystemZ/SystemZHazardRecognizer.cpp b/llvm/lib/Target/SystemZ/SystemZHazardRecognizer.cpp index 5313fba..8fc339f 100644 --- a/llvm/lib/Target/SystemZ/SystemZHazardRecognizer.cpp +++ b/llvm/lib/Target/SystemZ/SystemZHazardRecognizer.cpp @@ -115,11 +115,10 @@ SystemZHazardRecognizer::fitsIntoCurrentGroup(SUnit *SU) const { } bool SystemZHazardRecognizer::has4RegOps(const MachineInstr *MI) const { - const TargetRegisterInfo *TRI = &TII->getRegisterInfo(); const MCInstrDesc &MID = MI->getDesc(); unsigned Count = 0; for (unsigned OpIdx = 0; OpIdx < MID.getNumOperands(); OpIdx++) { - const TargetRegisterClass *RC = TII->getRegClass(MID, OpIdx, TRI); + const TargetRegisterClass *RC = TII->getRegClass(MID, OpIdx); if (RC == nullptr) continue; if (OpIdx >= MID.getNumDefs() && diff --git a/llvm/lib/Target/SystemZ/SystemZInstrInfo.cpp b/llvm/lib/Target/SystemZ/SystemZInstrInfo.cpp index 2e21f27..eb1ce4a 100644 --- a/llvm/lib/Target/SystemZ/SystemZInstrInfo.cpp +++ b/llvm/lib/Target/SystemZ/SystemZInstrInfo.cpp @@ -60,7 +60,7 @@ static uint64_t allOnes(unsigned int Count) { void SystemZInstrInfo::anchor() {} SystemZInstrInfo::SystemZInstrInfo(const SystemZSubtarget &sti) - : SystemZGenInstrInfo(sti, -1, -1), + : SystemZGenInstrInfo(sti, RI, -1, -1), RI(sti.getSpecialRegisters()->getReturnFunctionAddressRegister(), sti.getHwMode()), STI(sti) {} @@ -1023,8 +1023,8 @@ void SystemZInstrInfo::copyPhysReg(MachineBasicBlock &MBB, void SystemZInstrInfo::storeRegToStackSlot( MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI, Register SrcReg, bool isKill, int FrameIdx, const TargetRegisterClass *RC, - const TargetRegisterInfo *TRI, Register VReg, - MachineInstr::MIFlag Flags) const { + + Register VReg, MachineInstr::MIFlag Flags) const { DebugLoc DL = MBBI != MBB.end() ? MBBI->getDebugLoc() : DebugLoc(); // Callers may expect a single instruction, so keep 128-bit moves @@ -1036,10 +1036,12 @@ void SystemZInstrInfo::storeRegToStackSlot( FrameIdx); } -void SystemZInstrInfo::loadRegFromStackSlot( - MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI, Register DestReg, - int FrameIdx, const TargetRegisterClass *RC, const TargetRegisterInfo *TRI, - Register VReg, MachineInstr::MIFlag Flags) const { +void SystemZInstrInfo::loadRegFromStackSlot(MachineBasicBlock &MBB, + MachineBasicBlock::iterator MBBI, + Register DestReg, int FrameIdx, + const TargetRegisterClass *RC, + Register VReg, + MachineInstr::MIFlag Flags) const { DebugLoc DL = MBBI != MBB.end() ? MBBI->getDebugLoc() : DebugLoc(); // Callers may expect a single instruction, so keep 128-bit moves diff --git a/llvm/lib/Target/SystemZ/SystemZInstrInfo.h b/llvm/lib/Target/SystemZ/SystemZInstrInfo.h index 7b9ad7b..4aecdd7 100644 --- a/llvm/lib/Target/SystemZ/SystemZInstrInfo.h +++ b/llvm/lib/Target/SystemZ/SystemZInstrInfo.h @@ -281,12 +281,14 @@ public: void storeRegToStackSlot( MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI, Register SrcReg, bool isKill, int FrameIndex, const TargetRegisterClass *RC, - const TargetRegisterInfo *TRI, Register VReg, + + Register VReg, MachineInstr::MIFlag Flags = MachineInstr::NoFlags) const override; void loadRegFromStackSlot( MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI, Register DestReg, int FrameIdx, const TargetRegisterClass *RC, - const TargetRegisterInfo *TRI, Register VReg, + + Register VReg, MachineInstr::MIFlag Flags = MachineInstr::NoFlags) const override; MachineInstr *convertToThreeAddress(MachineInstr &MI, LiveVariables *LV, LiveIntervals *LIS) const override; diff --git a/llvm/lib/Target/VE/VEInstrInfo.cpp b/llvm/lib/Target/VE/VEInstrInfo.cpp index d5e804a..b9ac5d6 100644 --- a/llvm/lib/Target/VE/VEInstrInfo.cpp +++ b/llvm/lib/Target/VE/VEInstrInfo.cpp @@ -35,7 +35,7 @@ using namespace llvm; void VEInstrInfo::anchor() {} VEInstrInfo::VEInstrInfo(const VESubtarget &ST) - : VEGenInstrInfo(ST, VE::ADJCALLSTACKDOWN, VE::ADJCALLSTACKUP), RI() {} + : VEGenInstrInfo(ST, RI, VE::ADJCALLSTACKDOWN, VE::ADJCALLSTACKUP), RI() {} static bool IsIntegerCC(unsigned CC) { return (CC < VECC::CC_AF); } @@ -459,7 +459,6 @@ void VEInstrInfo::storeRegToStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator I, Register SrcReg, bool isKill, int FI, const TargetRegisterClass *RC, - const TargetRegisterInfo *TRI, Register VReg, MachineInstr::MIFlag Flags) const { DebugLoc DL; @@ -519,10 +518,12 @@ void VEInstrInfo::storeRegToStackSlot(MachineBasicBlock &MBB, report_fatal_error("Can't store this register to stack slot"); } -void VEInstrInfo::loadRegFromStackSlot( - MachineBasicBlock &MBB, MachineBasicBlock::iterator I, Register DestReg, - int FI, const TargetRegisterClass *RC, const TargetRegisterInfo *TRI, - Register VReg, MachineInstr::MIFlag Flags) const { +void VEInstrInfo::loadRegFromStackSlot(MachineBasicBlock &MBB, + MachineBasicBlock::iterator I, + Register DestReg, int FI, + const TargetRegisterClass *RC, + Register VReg, + MachineInstr::MIFlag Flags) const { DebugLoc DL; if (I != MBB.end()) DL = I->getDebugLoc(); diff --git a/llvm/lib/Target/VE/VEInstrInfo.h b/llvm/lib/Target/VE/VEInstrInfo.h index 408d3ab..cedf7f2 100644 --- a/llvm/lib/Target/VE/VEInstrInfo.h +++ b/llvm/lib/Target/VE/VEInstrInfo.h @@ -92,13 +92,15 @@ public: void storeRegToStackSlot( MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI, Register SrcReg, bool isKill, int FrameIndex, const TargetRegisterClass *RC, - const TargetRegisterInfo *TRI, Register VReg, + + Register VReg, MachineInstr::MIFlag Flags = MachineInstr::NoFlags) const override; void loadRegFromStackSlot( MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI, Register DestReg, int FrameIndex, const TargetRegisterClass *RC, - const TargetRegisterInfo *TRI, Register VReg, + + Register VReg, MachineInstr::MIFlag Flags = MachineInstr::NoFlags) const override; /// } Stack Spill & Reload diff --git a/llvm/lib/Target/WebAssembly/WebAssemblyInstrInfo.cpp b/llvm/lib/Target/WebAssembly/WebAssemblyInstrInfo.cpp index 343d90e..8b4e4fb 100644 --- a/llvm/lib/Target/WebAssembly/WebAssemblyInstrInfo.cpp +++ b/llvm/lib/Target/WebAssembly/WebAssemblyInstrInfo.cpp @@ -34,7 +34,7 @@ using namespace llvm; #include "WebAssemblyGenInstrInfo.inc" WebAssemblyInstrInfo::WebAssemblyInstrInfo(const WebAssemblySubtarget &STI) - : WebAssemblyGenInstrInfo(STI, WebAssembly::ADJCALLSTACKDOWN, + : WebAssemblyGenInstrInfo(STI, RI, WebAssembly::ADJCALLSTACKDOWN, WebAssembly::ADJCALLSTACKUP, WebAssembly::CATCHRET), RI(STI.getTargetTriple()) {} diff --git a/llvm/lib/Target/X86/X86AvoidStoreForwardingBlocks.cpp b/llvm/lib/Target/X86/X86AvoidStoreForwardingBlocks.cpp index d2e3527..9473e8d 100644 --- a/llvm/lib/Target/X86/X86AvoidStoreForwardingBlocks.cpp +++ b/llvm/lib/Target/X86/X86AvoidStoreForwardingBlocks.cpp @@ -387,8 +387,8 @@ void X86AvoidSFBPass::buildCopy(MachineInstr *LoadInst, unsigned NLoadOpcode, MachineMemOperand *LMMO = *LoadInst->memoperands_begin(); MachineMemOperand *SMMO = *StoreInst->memoperands_begin(); - Register Reg1 = MRI->createVirtualRegister( - TII->getRegClass(TII->get(NLoadOpcode), 0, TRI)); + Register Reg1 = + MRI->createVirtualRegister(TII->getRegClass(TII->get(NLoadOpcode), 0)); MachineInstr *NewLoad = BuildMI(*MBB, LoadInst, LoadInst->getDebugLoc(), TII->get(NLoadOpcode), Reg1) @@ -553,7 +553,7 @@ void X86AvoidSFBPass::findPotentiallylBlockedCopies(MachineFunction &MF) { } unsigned X86AvoidSFBPass::getRegSizeInBytes(MachineInstr *LoadInst) { - const auto *TRC = TII->getRegClass(TII->get(LoadInst->getOpcode()), 0, TRI); + const auto *TRC = TII->getRegClass(TII->get(LoadInst->getOpcode()), 0); return TRI->getRegSizeInBits(*TRC) / 8; } diff --git a/llvm/lib/Target/X86/X86DomainReassignment.cpp b/llvm/lib/Target/X86/X86DomainReassignment.cpp index 5d19011..2047a53 100644 --- a/llvm/lib/Target/X86/X86DomainReassignment.cpp +++ b/llvm/lib/Target/X86/X86DomainReassignment.cpp @@ -174,8 +174,8 @@ public: MachineBasicBlock *MBB = MI->getParent(); const DebugLoc &DL = MI->getDebugLoc(); - Register Reg = MRI->createVirtualRegister( - TII->getRegClass(TII->get(DstOpcode), 0, MRI->getTargetRegisterInfo())); + Register Reg = + MRI->createVirtualRegister(TII->getRegClass(TII->get(DstOpcode), 0)); MachineInstrBuilder Bld = BuildMI(*MBB, MI, DL, TII->get(DstOpcode), Reg); for (const MachineOperand &MO : llvm::drop_begin(MI->operands())) Bld.add(MO); diff --git a/llvm/lib/Target/X86/X86FastPreTileConfig.cpp b/llvm/lib/Target/X86/X86FastPreTileConfig.cpp index 06f729a..25799f4 100644 --- a/llvm/lib/Target/X86/X86FastPreTileConfig.cpp +++ b/llvm/lib/Target/X86/X86FastPreTileConfig.cpp @@ -206,8 +206,7 @@ void X86FastPreTileConfig::spill(MachineBasicBlock::iterator Before, const TargetRegisterClass &RC = *MRI->getRegClass(VirtReg); // Don't need shape information for tile store, becasue it is adjacent to // the tile def instruction. - TII->storeRegToStackSlot(*MBB, Before, VirtReg, Kill, FI, &RC, TRI, - Register()); + TII->storeRegToStackSlot(*MBB, Before, VirtReg, Kill, FI, &RC, Register()); ++NumStores; // TODO: update DBG_VALUEs diff --git a/llvm/lib/Target/X86/X86FrameLowering.cpp b/llvm/lib/Target/X86/X86FrameLowering.cpp index a66a321..8bca634 100644 --- a/llvm/lib/Target/X86/X86FrameLowering.cpp +++ b/llvm/lib/Target/X86/X86FrameLowering.cpp @@ -3093,8 +3093,8 @@ bool X86FrameLowering::spillCalleeSavedRegisters( MBB.addLiveIn(Reg); const TargetRegisterClass *RC = TRI->getMinimalPhysRegClass(Reg, VT); - TII.storeRegToStackSlot(MBB, MI, Reg, true, I.getFrameIdx(), RC, TRI, - Register(), MachineInstr::FrameSetup); + TII.storeRegToStackSlot(MBB, MI, Reg, true, I.getFrameIdx(), RC, Register(), + MachineInstr::FrameSetup); } return true; @@ -3166,8 +3166,7 @@ bool X86FrameLowering::restoreCalleeSavedRegisters( VT = STI.hasBWI() ? MVT::v64i1 : MVT::v16i1; const TargetRegisterClass *RC = TRI->getMinimalPhysRegClass(Reg, VT); - TII.loadRegFromStackSlot(MBB, MI, Reg, I.getFrameIdx(), RC, TRI, - Register()); + TII.loadRegFromStackSlot(MBB, MI, Reg, I.getFrameIdx(), RC, Register()); } // Clear the stack slot for spill base pointer register. diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp index 05a854a..5bce539 100644 --- a/llvm/lib/Target/X86/X86ISelLowering.cpp +++ b/llvm/lib/Target/X86/X86ISelLowering.cpp @@ -635,6 +635,7 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM, setOperationAction(ISD::FROUNDEVEN, VT, Action); setOperationAction(ISD::FTRUNC, VT, Action); setOperationAction(ISD::FLDEXP, VT, Action); + setOperationAction(ISD::FSINCOSPI, VT, Action); }; if (!Subtarget.useSoftFloat() && Subtarget.hasSSE2()) { diff --git a/llvm/lib/Target/X86/X86InstrInfo.cpp b/llvm/lib/Target/X86/X86InstrInfo.cpp index 6b2a7a4..61d9608 100644 --- a/llvm/lib/Target/X86/X86InstrInfo.cpp +++ b/llvm/lib/Target/X86/X86InstrInfo.cpp @@ -85,7 +85,7 @@ static cl::opt<unsigned> UndefRegClearance( void X86InstrInfo::anchor() {} X86InstrInfo::X86InstrInfo(const X86Subtarget &STI) - : X86GenInstrInfo(STI, + : X86GenInstrInfo(STI, RI, (STI.isTarget64BitLP64() ? X86::ADJCALLSTACKDOWN64 : X86::ADJCALLSTACKDOWN32), (STI.isTarget64BitLP64() ? X86::ADJCALLSTACKUP64 @@ -93,10 +93,9 @@ X86InstrInfo::X86InstrInfo(const X86Subtarget &STI) X86::CATCHRET, (STI.is64Bit() ? X86::RET64 : X86::RET32)), Subtarget(STI), RI(STI.getTargetTriple()) {} -const TargetRegisterClass * -X86InstrInfo::getRegClass(const MCInstrDesc &MCID, unsigned OpNum, - const TargetRegisterInfo *TRI) const { - auto *RC = TargetInstrInfo::getRegClass(MCID, OpNum, TRI); +const TargetRegisterClass *X86InstrInfo::getRegClass(const MCInstrDesc &MCID, + unsigned OpNum) const { + auto *RC = TargetInstrInfo::getRegClass(MCID, OpNum); // If the target does not have egpr, then r16-r31 will be resereved for all // instructions. if (!RC || !Subtarget.hasEGPR()) @@ -958,8 +957,7 @@ bool X86InstrInfo::isReMaterializableImpl( void X86InstrInfo::reMaterialize(MachineBasicBlock &MBB, MachineBasicBlock::iterator I, Register DestReg, unsigned SubIdx, - const MachineInstr &Orig, - const TargetRegisterInfo &TRI) const { + const MachineInstr &Orig) const { bool ClobbersEFLAGS = Orig.modifiesRegister(X86::EFLAGS, &TRI); if (ClobbersEFLAGS && MBB.computeRegisterLiveness(&TRI, X86::EFLAGS, I) != MachineBasicBlock::LQR_Dead) { @@ -4782,14 +4780,14 @@ void X86InstrInfo::loadStoreTileReg(MachineBasicBlock &MBB, void X86InstrInfo::storeRegToStackSlot( MachineBasicBlock &MBB, MachineBasicBlock::iterator MI, Register SrcReg, bool isKill, int FrameIdx, const TargetRegisterClass *RC, - const TargetRegisterInfo *TRI, Register VReg, - MachineInstr::MIFlag Flags) const { + + Register VReg, MachineInstr::MIFlag Flags) const { const MachineFunction &MF = *MBB.getParent(); const MachineFrameInfo &MFI = MF.getFrameInfo(); - assert(MFI.getObjectSize(FrameIdx) >= TRI->getSpillSize(*RC) && + assert(MFI.getObjectSize(FrameIdx) >= RI.getSpillSize(*RC) && "Stack slot too small for store"); - unsigned Alignment = std::max<uint32_t>(TRI->getSpillSize(*RC), 16); + unsigned Alignment = std::max<uint32_t>(RI.getSpillSize(*RC), 16); bool isAligned = (Subtarget.getFrameLowering()->getStackAlign() >= Alignment) || (RI.canRealignStack(MF) && !MFI.isFixedObjectIndex(FrameIdx)); @@ -4803,15 +4801,17 @@ void X86InstrInfo::storeRegToStackSlot( .setMIFlag(Flags); } -void X86InstrInfo::loadRegFromStackSlot( - MachineBasicBlock &MBB, MachineBasicBlock::iterator MI, Register DestReg, - int FrameIdx, const TargetRegisterClass *RC, const TargetRegisterInfo *TRI, - Register VReg, MachineInstr::MIFlag Flags) const { +void X86InstrInfo::loadRegFromStackSlot(MachineBasicBlock &MBB, + MachineBasicBlock::iterator MI, + Register DestReg, int FrameIdx, + const TargetRegisterClass *RC, + Register VReg, + MachineInstr::MIFlag Flags) const { const MachineFunction &MF = *MBB.getParent(); const MachineFrameInfo &MFI = MF.getFrameInfo(); - assert(MFI.getObjectSize(FrameIdx) >= TRI->getSpillSize(*RC) && + assert(MFI.getObjectSize(FrameIdx) >= RI.getSpillSize(*RC) && "Load size exceeds stack slot"); - unsigned Alignment = std::max<uint32_t>(TRI->getSpillSize(*RC), 16); + unsigned Alignment = std::max<uint32_t>(RI.getSpillSize(*RC), 16); bool isAligned = (Subtarget.getFrameLowering()->getStackAlign() >= Alignment) || (RI.canRealignStack(MF) && !MFI.isFixedObjectIndex(FrameIdx)); @@ -5553,7 +5553,7 @@ bool X86InstrInfo::optimizeCompareInstr(MachineInstr &CmpInstr, Register SrcReg, return false; ShouldUpdateCC = true; } else if (ImmDelta != 0) { - unsigned BitWidth = TRI->getRegSizeInBits(*MRI->getRegClass(SrcReg)); + unsigned BitWidth = RI.getRegSizeInBits(*MRI->getRegClass(SrcReg)); // Shift amount for min/max constants to adjust for 8/16/32 instruction // sizes. switch (OldCC) { @@ -7235,7 +7235,6 @@ static void updateOperandRegConstraints(MachineFunction &MF, MachineInstr &NewMI, const TargetInstrInfo &TII) { MachineRegisterInfo &MRI = MF.getRegInfo(); - const TargetRegisterInfo &TRI = *MRI.getTargetRegisterInfo(); for (int Idx : llvm::seq<int>(0, NewMI.getNumOperands())) { MachineOperand &MO = NewMI.getOperand(Idx); @@ -7247,7 +7246,7 @@ static void updateOperandRegConstraints(MachineFunction &MF, continue; auto *NewRC = - MRI.constrainRegClass(Reg, TII.getRegClass(NewMI.getDesc(), Idx, &TRI)); + MRI.constrainRegClass(Reg, TII.getRegClass(NewMI.getDesc(), Idx)); if (!NewRC) { LLVM_DEBUG( dbgs() << "WARNING: Unable to update register constraint for operand " @@ -7345,7 +7344,7 @@ MachineInstr *X86InstrInfo::foldMemoryOperandCustom( unsigned SrcIdx = (Imm >> 6) & 3; const TargetRegisterInfo &TRI = *MF.getSubtarget().getRegisterInfo(); - const TargetRegisterClass *RC = getRegClass(MI.getDesc(), OpNum, &RI); + const TargetRegisterClass *RC = getRegClass(MI.getDesc(), OpNum); unsigned RCSize = TRI.getRegSizeInBits(*RC) / 8; if ((Size == 0 || Size >= 16) && RCSize >= 16 && (MI.getOpcode() != X86::INSERTPSrri || Alignment >= Align(4))) { @@ -7370,7 +7369,7 @@ MachineInstr *X86InstrInfo::foldMemoryOperandCustom( // TODO: In most cases AVX doesn't have a 8-byte alignment requirement. if (OpNum == 2) { const TargetRegisterInfo &TRI = *MF.getSubtarget().getRegisterInfo(); - const TargetRegisterClass *RC = getRegClass(MI.getDesc(), OpNum, &RI); + const TargetRegisterClass *RC = getRegClass(MI.getDesc(), OpNum); unsigned RCSize = TRI.getRegSizeInBits(*RC) / 8; if ((Size == 0 || Size >= 16) && RCSize >= 16 && Alignment >= Align(8)) { unsigned NewOpCode = @@ -7389,7 +7388,7 @@ MachineInstr *X86InstrInfo::foldMemoryOperandCustom( // table twice. if (OpNum == 2) { const TargetRegisterInfo &TRI = *MF.getSubtarget().getRegisterInfo(); - const TargetRegisterClass *RC = getRegClass(MI.getDesc(), OpNum, &RI); + const TargetRegisterClass *RC = getRegClass(MI.getDesc(), OpNum); unsigned RCSize = TRI.getRegSizeInBits(*RC) / 8; if ((Size == 0 || Size >= 16) && RCSize >= 16 && Alignment < Align(16)) { MachineInstr *NewMI = @@ -7524,7 +7523,7 @@ MachineInstr *X86InstrInfo::foldMemoryOperandImpl( bool NarrowToMOV32rm = false; if (Size) { const TargetRegisterInfo &TRI = *MF.getSubtarget().getRegisterInfo(); - const TargetRegisterClass *RC = getRegClass(MI.getDesc(), OpNum, &RI); + const TargetRegisterClass *RC = getRegClass(MI.getDesc(), OpNum); unsigned RCSize = TRI.getRegSizeInBits(*RC) / 8; // Check if it's safe to fold the load. If the size of the object is // narrower than the load width, then it's not. @@ -8118,9 +8117,9 @@ MachineInstr *X86InstrInfo::foldMemoryOperandImpl( RC == &X86::VK32WMRegClass || RC == &X86::VK64WMRegClass; }; - if (Op1.isReg() && IsVKWMClass(getRegClass(MCID, 1, &RI))) + if (Op1.isReg() && IsVKWMClass(getRegClass(MCID, 1))) MaskReg = Op1.getReg(); - else if (Op2.isReg() && IsVKWMClass(getRegClass(MCID, 2, &RI))) + else if (Op2.isReg() && IsVKWMClass(getRegClass(MCID, 2))) MaskReg = Op2.getReg(); if (MaskReg) { @@ -8524,7 +8523,7 @@ bool X86InstrInfo::unfoldMemoryOperand( const MCInstrDesc &MCID = get(Opc); - const TargetRegisterClass *RC = getRegClass(MCID, Index, &RI); + const TargetRegisterClass *RC = getRegClass(MCID, Index); const TargetRegisterInfo &TRI = *MF.getSubtarget().getRegisterInfo(); // TODO: Check if 32-byte or greater accesses are slow too? if (!MI.hasOneMemOperand() && RC == &X86::VR128RegClass && @@ -8635,7 +8634,7 @@ bool X86InstrInfo::unfoldMemoryOperand( // Emit the store instruction. if (UnfoldStore) { - const TargetRegisterClass *DstRC = getRegClass(MCID, 0, &RI); + const TargetRegisterClass *DstRC = getRegClass(MCID, 0); auto MMOs = extractStoreMMOs(MI.memoperands(), MF); unsigned Alignment = std::max<uint32_t>(TRI.getSpillSize(*DstRC), 16); bool isAligned = !MMOs.empty() && MMOs.front()->getAlign() >= Alignment; @@ -8667,7 +8666,7 @@ bool X86InstrInfo::unfoldMemoryOperand( const MCInstrDesc &MCID = get(Opc); MachineFunction &MF = DAG.getMachineFunction(); const TargetRegisterInfo &TRI = *MF.getSubtarget().getRegisterInfo(); - const TargetRegisterClass *RC = getRegClass(MCID, Index, &RI); + const TargetRegisterClass *RC = getRegClass(MCID, Index); unsigned NumDefs = MCID.NumDefs; std::vector<SDValue> AddrOps; std::vector<SDValue> BeforeOps; @@ -8718,7 +8717,7 @@ bool X86InstrInfo::unfoldMemoryOperand( std::vector<EVT> VTs; const TargetRegisterClass *DstRC = nullptr; if (MCID.getNumDefs() > 0) { - DstRC = getRegClass(MCID, 0, &RI); + DstRC = getRegClass(MCID, 0); VTs.push_back(*TRI.legalclasstypes_begin(*DstRC)); } for (unsigned i = 0, e = N->getNumValues(); i != e; ++i) { diff --git a/llvm/lib/Target/X86/X86InstrInfo.h b/llvm/lib/Target/X86/X86InstrInfo.h index 5f75559..a547fcd 100644 --- a/llvm/lib/Target/X86/X86InstrInfo.h +++ b/llvm/lib/Target/X86/X86InstrInfo.h @@ -246,9 +246,8 @@ public: /// GR*RegClass (definition in TD file) /// -> /// GR*_NOREX2RegClass (Returned register class) - const TargetRegisterClass * - getRegClass(const MCInstrDesc &MCID, unsigned OpNum, - const TargetRegisterInfo *TRI) const override; + const TargetRegisterClass *getRegClass(const MCInstrDesc &MCID, + unsigned OpNum) const override; /// getRegisterInfo - TargetInstrInfo is a superset of MRegister info. As /// such, whenever a client has an instance of instruction info, it should @@ -343,8 +342,7 @@ public: bool isReMaterializableImpl(const MachineInstr &MI) const override; void reMaterialize(MachineBasicBlock &MBB, MachineBasicBlock::iterator MI, Register DestReg, unsigned SubIdx, - const MachineInstr &Orig, - const TargetRegisterInfo &TRI) const override; + const MachineInstr &Orig) const override; /// Given an operand within a MachineInstr, insert preceding code to put it /// into the right format for a particular kind of LEA instruction. This may @@ -469,14 +467,14 @@ public: bool RenamableSrc = false) const override; void storeRegToStackSlot( MachineBasicBlock &MBB, MachineBasicBlock::iterator MI, Register SrcReg, - bool isKill, int FrameIndex, const TargetRegisterClass *RC, - const TargetRegisterInfo *TRI, Register VReg, + bool isKill, int FrameIndex, const TargetRegisterClass *RC, Register VReg, MachineInstr::MIFlag Flags = MachineInstr::NoFlags) const override; void loadRegFromStackSlot( MachineBasicBlock &MBB, MachineBasicBlock::iterator MI, Register DestReg, int FrameIndex, const TargetRegisterClass *RC, - const TargetRegisterInfo *TRI, Register VReg, + + Register VReg, MachineInstr::MIFlag Flags = MachineInstr::NoFlags) const override; void loadStoreTileReg(MachineBasicBlock &MBB, MachineBasicBlock::iterator MI, diff --git a/llvm/lib/Target/X86/X86OptimizeLEAs.cpp b/llvm/lib/Target/X86/X86OptimizeLEAs.cpp index 167bed1..c964605 100644 --- a/llvm/lib/Target/X86/X86OptimizeLEAs.cpp +++ b/llvm/lib/Target/X86/X86OptimizeLEAs.cpp @@ -359,7 +359,7 @@ bool X86OptimizeLEAPass::chooseBestLEA( // example MOV8mr_NOREX. We could constrain the register class of the LEA // def to suit MI, however since this case is very rare and hard to // reproduce in a test it's just more reliable to skip the LEA. - if (TII->getRegClass(Desc, MemOpNo + X86::AddrBaseReg, TRI) != + if (TII->getRegClass(Desc, MemOpNo + X86::AddrBaseReg) != MRI->getRegClass(DefMI->getOperand(0).getReg())) continue; diff --git a/llvm/lib/Target/X86/X86SpeculativeLoadHardening.cpp b/llvm/lib/Target/X86/X86SpeculativeLoadHardening.cpp index e0b3b61..d0d897e 100644 --- a/llvm/lib/Target/X86/X86SpeculativeLoadHardening.cpp +++ b/llvm/lib/Target/X86/X86SpeculativeLoadHardening.cpp @@ -841,7 +841,7 @@ getRegClassForUnfoldedLoad(const X86InstrInfo &TII, unsigned Opcode) { unsigned UnfoldedOpc = TII.getOpcodeAfterMemoryUnfold( Opcode, /*UnfoldLoad*/ true, /*UnfoldStore*/ false, &Index); const MCInstrDesc &MCID = TII.get(UnfoldedOpc); - return TII.getRegClass(MCID, Index, &TII.getRegisterInfo()); + return TII.getRegClass(MCID, Index); } void X86SpeculativeLoadHardeningPass::unfoldCallAndJumpLoads( diff --git a/llvm/lib/Target/X86/X86Subtarget.h b/llvm/lib/Target/X86/X86Subtarget.h index 868f413..4f5aadc 100644 --- a/llvm/lib/Target/X86/X86Subtarget.h +++ b/llvm/lib/Target/X86/X86Subtarget.h @@ -419,6 +419,8 @@ public: /// Enable the MachineScheduler pass for all X86 subtargets. bool enableMachineScheduler() const override { return true; } + bool enableTerminalRule() const override { return true; } + bool enableEarlyIfConversion() const override; void getPostRAMutations(std::vector<std::unique_ptr<ScheduleDAGMutation>> diff --git a/llvm/lib/Target/XCore/Disassembler/XCoreDisassembler.cpp b/llvm/lib/Target/XCore/Disassembler/XCoreDisassembler.cpp index 096ad08..0e00db49 100644 --- a/llvm/lib/Target/XCore/Disassembler/XCoreDisassembler.cpp +++ b/llvm/lib/Target/XCore/Disassembler/XCoreDisassembler.cpp @@ -69,7 +69,7 @@ static bool readInstruction32(ArrayRef<uint8_t> Bytes, uint64_t Address, return true; } -static unsigned getReg(const MCDisassembler *D, unsigned RC, unsigned RegNo) { +static MCRegister getReg(const MCDisassembler *D, unsigned RC, unsigned RegNo) { const MCRegisterInfo *RegInfo = D->getContext().getRegisterInfo(); return RegInfo->getRegClass(RC).getRegister(RegNo); } @@ -79,7 +79,7 @@ static DecodeStatus DecodeGRRegsRegisterClass(MCInst &Inst, unsigned RegNo, const MCDisassembler *Decoder) { if (RegNo > 11) return MCDisassembler::Fail; - unsigned Reg = getReg(Decoder, XCore::GRRegsRegClassID, RegNo); + MCRegister Reg = getReg(Decoder, XCore::GRRegsRegClassID, RegNo); Inst.addOperand(MCOperand::createReg(Reg)); return MCDisassembler::Success; } @@ -89,7 +89,7 @@ static DecodeStatus DecodeRRegsRegisterClass(MCInst &Inst, unsigned RegNo, const MCDisassembler *Decoder) { if (RegNo > 15) return MCDisassembler::Fail; - unsigned Reg = getReg(Decoder, XCore::RRegsRegClassID, RegNo); + MCRegister Reg = getReg(Decoder, XCore::RRegsRegClassID, RegNo); Inst.addOperand(MCOperand::createReg(Reg)); return MCDisassembler::Success; } diff --git a/llvm/lib/Target/XCore/XCoreFrameLowering.cpp b/llvm/lib/Target/XCore/XCoreFrameLowering.cpp index cdb5186..351a221 100644 --- a/llvm/lib/Target/XCore/XCoreFrameLowering.cpp +++ b/llvm/lib/Target/XCore/XCoreFrameLowering.cpp @@ -432,7 +432,7 @@ bool XCoreFrameLowering::spillCalleeSavedRegisters( // Add the callee-saved register as live-in. It's killed at the spill. MBB.addLiveIn(Reg); const TargetRegisterClass *RC = TRI->getMinimalPhysRegClass(Reg); - TII.storeRegToStackSlot(MBB, MI, Reg, true, I.getFrameIdx(), RC, TRI, + TII.storeRegToStackSlot(MBB, MI, Reg, true, I.getFrameIdx(), RC, Register()); if (emitFrameMoves) { auto Store = MI; @@ -458,8 +458,7 @@ bool XCoreFrameLowering::restoreCalleeSavedRegisters( "LR & FP are always handled in emitEpilogue"); const TargetRegisterClass *RC = TRI->getMinimalPhysRegClass(Reg); - TII.loadRegFromStackSlot(MBB, MI, Reg, CSR.getFrameIdx(), RC, TRI, - Register()); + TII.loadRegFromStackSlot(MBB, MI, Reg, CSR.getFrameIdx(), RC, Register()); assert(MI != MBB.begin() && "loadRegFromStackSlot didn't insert any code!"); // Insert in reverse order. loadRegFromStackSlot can insert multiple diff --git a/llvm/lib/Target/XCore/XCoreInstrInfo.cpp b/llvm/lib/Target/XCore/XCoreInstrInfo.cpp index 1a9133a..075910c 100644 --- a/llvm/lib/Target/XCore/XCoreInstrInfo.cpp +++ b/llvm/lib/Target/XCore/XCoreInstrInfo.cpp @@ -43,7 +43,7 @@ namespace XCore { void XCoreInstrInfo::anchor() {} XCoreInstrInfo::XCoreInstrInfo(const XCoreSubtarget &ST) - : XCoreGenInstrInfo(ST, XCore::ADJCALLSTACKDOWN, XCore::ADJCALLSTACKUP), + : XCoreGenInstrInfo(ST, RI, XCore::ADJCALLSTACKDOWN, XCore::ADJCALLSTACKUP), RI() {} static bool isZeroImm(const MachineOperand &op) { @@ -355,8 +355,8 @@ void XCoreInstrInfo::copyPhysReg(MachineBasicBlock &MBB, void XCoreInstrInfo::storeRegToStackSlot( MachineBasicBlock &MBB, MachineBasicBlock::iterator I, Register SrcReg, bool isKill, int FrameIndex, const TargetRegisterClass *RC, - const TargetRegisterInfo *TRI, Register VReg, - MachineInstr::MIFlag Flags) const { + + Register VReg, MachineInstr::MIFlag Flags) const { DebugLoc DL; if (I != MBB.end() && !I->isDebugInstr()) DL = I->getDebugLoc(); @@ -377,7 +377,6 @@ void XCoreInstrInfo::loadRegFromStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator I, Register DestReg, int FrameIndex, const TargetRegisterClass *RC, - const TargetRegisterInfo *TRI, Register VReg, MachineInstr::MIFlag Flags) const { DebugLoc DL; diff --git a/llvm/lib/Target/XCore/XCoreInstrInfo.h b/llvm/lib/Target/XCore/XCoreInstrInfo.h index 3543392..c4e399e 100644 --- a/llvm/lib/Target/XCore/XCoreInstrInfo.h +++ b/llvm/lib/Target/XCore/XCoreInstrInfo.h @@ -71,13 +71,15 @@ public: void storeRegToStackSlot( MachineBasicBlock &MBB, MachineBasicBlock::iterator MI, Register SrcReg, bool isKill, int FrameIndex, const TargetRegisterClass *RC, - const TargetRegisterInfo *TRI, Register VReg, + + Register VReg, MachineInstr::MIFlag Flags = MachineInstr::NoFlags) const override; void loadRegFromStackSlot( MachineBasicBlock &MBB, MachineBasicBlock::iterator MI, Register DestReg, int FrameIndex, const TargetRegisterClass *RC, - const TargetRegisterInfo *TRI, Register VReg, + + Register VReg, MachineInstr::MIFlag Flags = MachineInstr::NoFlags) const override; bool reverseBranchCondition( diff --git a/llvm/lib/Target/Xtensa/XtensaFrameLowering.cpp b/llvm/lib/Target/Xtensa/XtensaFrameLowering.cpp index cf9a2a0..1c0dc66 100644 --- a/llvm/lib/Target/Xtensa/XtensaFrameLowering.cpp +++ b/llvm/lib/Target/Xtensa/XtensaFrameLowering.cpp @@ -314,7 +314,7 @@ bool XtensaFrameLowering::spillCalleeSavedRegisters( bool IsKill = !IsA0AndRetAddrIsTaken; const TargetRegisterClass *RC = TRI->getMinimalPhysRegClass(Reg); TII.storeRegToStackSlot(EntryBlock, MI, Reg, IsKill, CSI[i].getFrameIdx(), - RC, TRI, Register()); + RC, Register()); } return true; diff --git a/llvm/lib/Target/Xtensa/XtensaInstrInfo.cpp b/llvm/lib/Target/Xtensa/XtensaInstrInfo.cpp index be69cef..d7b05ac 100644 --- a/llvm/lib/Target/Xtensa/XtensaInstrInfo.cpp +++ b/llvm/lib/Target/Xtensa/XtensaInstrInfo.cpp @@ -48,7 +48,8 @@ addFrameReference(const MachineInstrBuilder &MIB, int FI) { } XtensaInstrInfo::XtensaInstrInfo(const XtensaSubtarget &STI) - : XtensaGenInstrInfo(STI, Xtensa::ADJCALLSTACKDOWN, Xtensa::ADJCALLSTACKUP), + : XtensaGenInstrInfo(STI, RI, Xtensa::ADJCALLSTACKDOWN, + Xtensa::ADJCALLSTACKUP), RI(STI), STI(STI) {} Register XtensaInstrInfo::isLoadFromStackSlot(const MachineInstr &MI, @@ -144,8 +145,8 @@ void XtensaInstrInfo::copyPhysReg(MachineBasicBlock &MBB, void XtensaInstrInfo::storeRegToStackSlot( MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI, Register SrcReg, bool isKill, int FrameIdx, const TargetRegisterClass *RC, - const TargetRegisterInfo *TRI, Register VReg, - MachineInstr::MIFlag Flags) const { + + Register VReg, MachineInstr::MIFlag Flags) const { DebugLoc DL = MBBI != MBB.end() ? MBBI->getDebugLoc() : DebugLoc(); unsigned LoadOpcode, StoreOpcode; getLoadStoreOpcodes(RC, LoadOpcode, StoreOpcode, FrameIdx); @@ -154,10 +155,12 @@ void XtensaInstrInfo::storeRegToStackSlot( addFrameReference(MIB, FrameIdx); } -void XtensaInstrInfo::loadRegFromStackSlot( - MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI, Register DestReg, - int FrameIdx, const TargetRegisterClass *RC, const TargetRegisterInfo *TRI, - Register VReg, MachineInstr::MIFlag Flags) const { +void XtensaInstrInfo::loadRegFromStackSlot(MachineBasicBlock &MBB, + MachineBasicBlock::iterator MBBI, + Register DestReg, int FrameIdx, + const TargetRegisterClass *RC, + Register VReg, + MachineInstr::MIFlag Flags) const { DebugLoc DL = MBBI != MBB.end() ? MBBI->getDebugLoc() : DebugLoc(); unsigned LoadOpcode, StoreOpcode; getLoadStoreOpcodes(RC, LoadOpcode, StoreOpcode, FrameIdx); @@ -543,12 +546,12 @@ void XtensaInstrInfo::insertIndirectBranch(MachineBasicBlock &MBB, "function code size is significantly larger than estimated"); storeRegToStackSlot(MBB, L32R, ScavRegister, /*IsKill=*/true, FrameIndex, - &Xtensa::ARRegClass, &RI, Register()); + &Xtensa::ARRegClass, Register()); RI.eliminateFrameIndex(std::prev(L32R.getIterator()), /*SpAdj=*/0, /*FIOperandNum=*/1); loadRegFromStackSlot(RestoreBB, RestoreBB.end(), ScavRegister, FrameIndex, - &Xtensa::ARRegClass, &RI, Register()); + &Xtensa::ARRegClass, Register()); RI.eliminateFrameIndex(RestoreBB.back(), /*SpAdj=*/0, /*FIOperandNum=*/1); JumpToMBB = &RestoreBB; diff --git a/llvm/lib/Target/Xtensa/XtensaInstrInfo.h b/llvm/lib/Target/Xtensa/XtensaInstrInfo.h index 1808cb3..0b46d6c 100644 --- a/llvm/lib/Target/Xtensa/XtensaInstrInfo.h +++ b/llvm/lib/Target/Xtensa/XtensaInstrInfo.h @@ -56,14 +56,13 @@ public: void storeRegToStackSlot( MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI, Register SrcReg, - bool isKill, int FrameIndex, const TargetRegisterClass *RC, - const TargetRegisterInfo *TRI, Register VReg, + bool isKill, int FrameIndex, const TargetRegisterClass *RC, Register VReg, MachineInstr::MIFlag Flags = MachineInstr::NoFlags) const override; void loadRegFromStackSlot( MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI, Register DestReg, int FrameIdx, const TargetRegisterClass *RC, - const TargetRegisterInfo *TRI, Register VReg, + Register VReg, MachineInstr::MIFlag Flags = MachineInstr::NoFlags) const override; // Get the load and store opcodes for a given register class and offset. diff --git a/llvm/lib/Transforms/Instrumentation/InstrProfiling.cpp b/llvm/lib/Transforms/Instrumentation/InstrProfiling.cpp index b5548d4..8c8d16a6 100644 --- a/llvm/lib/Transforms/Instrumentation/InstrProfiling.cpp +++ b/llvm/lib/Transforms/Instrumentation/InstrProfiling.cpp @@ -1944,6 +1944,10 @@ void InstrLowerer::emitNameData() { NamesVar = new GlobalVariable(M, NamesVal->getType(), true, GlobalValue::PrivateLinkage, NamesVal, getInstrProfNamesVarName()); + if (isGPUProfTarget(M)) { + NamesVar->setLinkage(GlobalValue::ExternalLinkage); + NamesVar->setVisibility(GlobalValue::ProtectedVisibility); + } NamesSize = CompressedNameStr.size(); setGlobalVariableLargeSection(TT, *NamesVar); diff --git a/llvm/lib/Transforms/Vectorize/VPlan.cpp b/llvm/lib/Transforms/Vectorize/VPlan.cpp index 5e4303a..90696ff 100644 --- a/llvm/lib/Transforms/Vectorize/VPlan.cpp +++ b/llvm/lib/Transforms/Vectorize/VPlan.cpp @@ -99,20 +99,20 @@ VPValue::VPValue(const unsigned char SC, Value *UV, VPDef *Def) VPValue::~VPValue() { assert(Users.empty() && "trying to delete a VPValue with remaining users"); - if (Def) + if (VPDef *Def = getDefiningRecipe()) Def->removeDefinedValue(this); } #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) void VPValue::print(raw_ostream &OS, VPSlotTracker &SlotTracker) const { - if (const VPRecipeBase *R = dyn_cast_or_null<VPRecipeBase>(Def)) + if (const VPRecipeBase *R = getDefiningRecipe()) R->print(OS, "", SlotTracker); else printAsOperand(OS, SlotTracker); } void VPValue::dump() const { - const VPRecipeBase *Instr = dyn_cast_or_null<VPRecipeBase>(this->Def); + const VPRecipeBase *Instr = getDefiningRecipe(); VPSlotTracker SlotTracker( (Instr && Instr->getParent()) ? Instr->getParent()->getPlan() : nullptr); print(dbgs(), SlotTracker); diff --git a/llvm/lib/Transforms/Vectorize/VPlan.h b/llvm/lib/Transforms/Vectorize/VPlan.h index 5851b3a..72858e1 100644 --- a/llvm/lib/Transforms/Vectorize/VPlan.h +++ b/llvm/lib/Transforms/Vectorize/VPlan.h @@ -959,6 +959,11 @@ public: /// Add metadata with kind \p Kind and \p Node. void addMetadata(unsigned Kind, MDNode *Node) { + assert(none_of(Metadata, + [Kind](const std::pair<unsigned, MDNode *> &P) { + return P.first == Kind; + }) && + "Kind must appear at most once in Metadata"); Metadata.emplace_back(Kind, Node); } diff --git a/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp b/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp index b319fbc7..eab6426 100644 --- a/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp +++ b/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp @@ -1420,10 +1420,26 @@ static void narrowToSingleScalarRecipes(VPlan &Plan) { // broadcasts. if (!vputils::isSingleScalar(RepOrWidenR) || !all_of(RepOrWidenR->users(), [RepOrWidenR](const VPUser *U) { - return U->usesScalars(RepOrWidenR) || - match(cast<VPRecipeBase>(U), - m_CombineOr(m_ExtractLastElement(m_VPValue()), - m_ExtractLastLanePerPart(m_VPValue()))); + if (auto *Store = dyn_cast<VPWidenStoreRecipe>(U)) { + // VPWidenStore doesn't have users, and stores are always + // profitable to widen: hence, permitting single-scalar stored + // values is an important leaf condition. The assert must hold as + // we checked the RepOrWidenR operand against + // vputils::isSingleScalar. + assert(RepOrWidenR == Store->getAddr() || + vputils::isSingleScalar(Store->getStoredValue())); + return true; + } + + if (auto *VPI = dyn_cast<VPInstruction>(U)) { + unsigned Opcode = VPI->getOpcode(); + if (Opcode == VPInstruction::ExtractLastElement || + Opcode == VPInstruction::ExtractLastLanePerPart || + Opcode == VPInstruction::ExtractPenultimateElement) + return true; + } + + return U->usesScalars(RepOrWidenR); })) continue; @@ -4131,13 +4147,13 @@ VPlanTransforms::expandSCEVs(VPlan &Plan, ScalarEvolution &SE) { /// is defined at \p Idx of a load interleave group. static bool canNarrowLoad(VPWidenRecipe *WideMember0, unsigned OpIdx, VPValue *OpV, unsigned Idx) { - auto *DefR = OpV->getDefiningRecipe(); - if (!DefR) - return WideMember0->getOperand(OpIdx) == OpV; - if (auto *W = dyn_cast<VPWidenLoadRecipe>(DefR)) - return !W->getMask() && WideMember0->getOperand(OpIdx) == OpV; - - if (auto *IR = dyn_cast<VPInterleaveRecipe>(DefR)) + VPValue *Member0Op = WideMember0->getOperand(OpIdx); + VPRecipeBase *Member0OpR = Member0Op->getDefiningRecipe(); + if (!Member0OpR) + return Member0Op == OpV; + if (auto *W = dyn_cast<VPWidenLoadRecipe>(Member0OpR)) + return !W->getMask() && Member0Op == OpV; + if (auto *IR = dyn_cast<VPInterleaveRecipe>(Member0OpR)) return IR->getInterleaveGroup()->isFull() && IR->getVPValue(Idx) == OpV; return false; } |
