diff options
-rw-r--r-- | llvm/include/llvm/Analysis/ReleaseModeModelRunner.h | 96 | ||||
-rw-r--r-- | llvm/lib/Analysis/MLInlineAdvisor.cpp | 6 | ||||
-rw-r--r-- | llvm/unittests/Analysis/MLModelRunnerTest.cpp | 137 |
3 files changed, 212 insertions, 27 deletions
diff --git a/llvm/include/llvm/Analysis/ReleaseModeModelRunner.h b/llvm/include/llvm/Analysis/ReleaseModeModelRunner.h index 9185513..9fb4ff4 100644 --- a/llvm/include/llvm/Analysis/ReleaseModeModelRunner.h +++ b/llvm/include/llvm/Analysis/ReleaseModeModelRunner.h @@ -14,17 +14,43 @@ #ifndef LLVM_ANALYSIS_RELEASEMODEMODELRUNNER_H #define LLVM_ANALYSIS_RELEASEMODEMODELRUNNER_H +#include "llvm/ADT/StringExtras.h" #include "llvm/Analysis/MLModelRunner.h" #include "llvm/Analysis/TensorSpec.h" #include "llvm/Support/ErrorHandling.h" +#include "llvm/Support/MD5.h" #include <memory> -#include <vector> namespace llvm { /// ReleaseModeModelRunner - production mode implementation of the /// MLModelRunner. It uses an AOT-compiled SavedModel for efficient execution. +struct EmbeddedModelRunnerOptions { + /// Feed and Fetch feature prefixes - i.e. a feature named "foo" will be + /// looked up as {FeedPrefix}_foo; and the output named "bar" will be looked + /// up as {FetchPrefix}_bar + StringRef FeedPrefix = "feed_"; + StringRef FetchPrefix = "fetch_"; + + /// ModelSelector is the name (recognized by the AOT-ed model) of a sub-model + /// to use. "" is allowed if the model doesn't support sub-models. + StringRef ModelSelector = ""; + + EmbeddedModelRunnerOptions &setFeedPrefix(StringRef Value) { + FeedPrefix = Value; + return *this; + } + EmbeddedModelRunnerOptions &setFetchPrefix(StringRef Value) { + FetchPrefix = Value; + return *this; + } + EmbeddedModelRunnerOptions &setModelSelector(StringRef Value) { + ModelSelector = Value; + return *this; + } +}; + template <class TGen> class ReleaseModeModelRunner final : public MLModelRunner { public: @@ -32,22 +58,50 @@ public: /// std::array or std::vector, that has a size() method. template <class FType> ReleaseModeModelRunner(LLVMContext &Ctx, const FType &InputSpec, - StringRef DecisionName, StringRef FeedPrefix = "feed_", - StringRef FetchPrefix = "fetch_") - : MLModelRunner(Ctx, MLModelRunner::Kind::Release, InputSpec.size()), + StringRef DecisionName, + const EmbeddedModelRunnerOptions &Options = {}) + : MLModelRunner(Ctx, MLModelRunner::Kind::Release, InputSpec.size() + 1), CompiledModel(std::make_unique<TGen>()) { assert(CompiledModel && "The CompiledModel should be valid"); - - for (size_t I = 0; I < InputSpec.size(); ++I) { - const int Index = - CompiledModel->LookupArgIndex(FeedPrefix.str() + InputSpec[I].name()); - void *Buffer = nullptr; - if (Index >= 0) - Buffer = CompiledModel->arg_data(Index); - setUpBufferForTensor(I, InputSpec[I], Buffer); + // Set up the model_selector past all the InputSpecs in all cases. + // - if the model doesn't have such a feature, but the user requested it, + // we report error. Same if the model supports it but the user didn't + // specify it + // - finally, we compute the MD5 hash of the user input and set the value + // of the model selector to {high, low} + bool InputIsPresent = true; + populateTensor(InputSpec.size(), + TensorSpec::createSpec<uint64_t>("_model_selector", {2}), + Options.FeedPrefix, InputIsPresent); + + // If we hit the "report an error" cases outlined above, continue with the + // set up in case there's some custom diagnostics handler installed and it + // doesn't promptly exit. + if (Options.ModelSelector.empty() && InputIsPresent) + Ctx.emitError( + "A model selector was not specified but the underlying model " + "requires selecting one because it exposes a _model_selector input"); + uint64_t High = 0; + uint64_t Low = 0; + if (!Options.ModelSelector.empty()) { + if (!InputIsPresent) + Ctx.emitError("A model selector was specified but the underlying model " + "does not expose a _model_selector input"); + const auto Hash = MD5::hash(arrayRefFromStringRef(Options.ModelSelector)); + High = Hash.high(); + Low = Hash.low(); } - - ResultIndex = CompiledModel->LookupResultIndex(FetchPrefix.str() + + getTensor<uint64_t>(InputSpec.size())[0] = High; + getTensor<uint64_t>(InputSpec.size())[1] = Low; + // At this point, the model selector is set up. If the user didn't provide + // one, but the model has a _model_selector, it'll be set to (0, 0) which + // the composite model should treat as error as part of its implementation + // (but that should only matter if there is a custom handler that doesn't + // exit on error) + for (size_t I = 0; I < InputSpec.size(); ++I) + populateTensor(I, InputSpec[I], Options.FeedPrefix, InputIsPresent); + + ResultIndex = CompiledModel->LookupResultIndex(Options.FetchPrefix.str() + DecisionName.str()); assert(ResultIndex >= 0 && "Cannot find DecisionName in inlining model"); } @@ -59,6 +113,20 @@ public: } private: + // fetch the model-provided buffer for the given Spec, or let MLModelRunner + // create a scratch buffer. Indicate back to the caller if the model had that + // input in the first place. + void populateTensor(size_t Pos, const TensorSpec &Spec, StringRef Prefix, + bool &InputIsPresent) { + const int Index = + CompiledModel->LookupArgIndex((Prefix + Spec.name()).str()); + void *Buffer = nullptr; + InputIsPresent = Index >= 0; + if (InputIsPresent) + Buffer = CompiledModel->arg_data(Index); + setUpBufferForTensor(Pos, Spec, Buffer); + } + void *evaluateUntyped() override { CompiledModel->Run(); return CompiledModel->result_data(ResultIndex); diff --git a/llvm/lib/Analysis/MLInlineAdvisor.cpp b/llvm/lib/Analysis/MLInlineAdvisor.cpp index 934c0cf..14eadf6 100644 --- a/llvm/lib/Analysis/MLInlineAdvisor.cpp +++ b/llvm/lib/Analysis/MLInlineAdvisor.cpp @@ -56,6 +56,9 @@ static cl::opt<SkipMLPolicyCriteria> SkipPolicy( clEnumValN(SkipMLPolicyCriteria::IfCallerIsNotCold, "if-caller-not-cold", "if the caller is not cold"))); +static cl::opt<std::string> ModelSelector("ml-inliner-model-selector", + cl::Hidden, cl::init("")); + #if defined(LLVM_HAVE_TF_AOT_INLINERSIZEMODEL) // codegen-ed file #include "InlinerSizeModel.h" // NOLINT @@ -73,7 +76,8 @@ llvm::getReleaseModeAdvisor(Module &M, ModuleAnalysisManager &MAM, std::unique_ptr<MLModelRunner> AOTRunner; if (InteractiveChannelBaseName.empty()) AOTRunner = std::make_unique<ReleaseModeModelRunner<CompiledModelType>>( - M.getContext(), FeatureMap, DecisionName); + M.getContext(), FeatureMap, DecisionName, + EmbeddedModelRunnerOptions().setModelSelector(ModelSelector)); else { auto Features = FeatureMap; if (InteractiveIncludeDefault) diff --git a/llvm/unittests/Analysis/MLModelRunnerTest.cpp b/llvm/unittests/Analysis/MLModelRunnerTest.cpp index 007a8cf..53ad2fc 100644 --- a/llvm/unittests/Analysis/MLModelRunnerTest.cpp +++ b/llvm/unittests/Analysis/MLModelRunnerTest.cpp @@ -7,10 +7,12 @@ //===----------------------------------------------------------------------===// #include "llvm/Analysis/MLModelRunner.h" +#include "llvm/ADT/StringExtras.h" #include "llvm/Analysis/InteractiveModelRunner.h" #include "llvm/Analysis/NoInferenceModelRunner.h" #include "llvm/Analysis/ReleaseModeModelRunner.h" #include "llvm/Support/BinaryByteStream.h" +#include "llvm/Support/ErrorHandling.h" #include "llvm/Support/FileSystem.h" #include "llvm/Support/FileUtilities.h" #include "llvm/Support/JSON.h" @@ -28,14 +30,17 @@ namespace llvm { // This is a mock of the kind of AOT-generated model evaluator. It has 2 tensors // of shape {1}, and 'evaluation' adds them. // The interface is the one expected by ReleaseModelRunner. -class MockAOTModel final { +class MockAOTModelBase { +protected: int64_t A = 0; int64_t B = 0; int64_t R = 0; public: - MockAOTModel() = default; - int LookupArgIndex(const std::string &Name) { + MockAOTModelBase() = default; + virtual ~MockAOTModelBase() = default; + + virtual int LookupArgIndex(const std::string &Name) { if (Name == "prefix_a") return 0; if (Name == "prefix_b") @@ -43,13 +48,13 @@ public: return -1; } int LookupResultIndex(const std::string &) { return 0; } - void Run() { R = A + B; } - void *result_data(int RIndex) { + virtual void Run() = 0; + virtual void *result_data(int RIndex) { if (RIndex == 0) return &R; return nullptr; } - void *arg_data(int Index) { + virtual void *arg_data(int Index) { switch (Index) { case 0: return &A; @@ -60,6 +65,64 @@ public: } } }; + +class AdditionAOTModel final : public MockAOTModelBase { +public: + AdditionAOTModel() = default; + void Run() override { R = A + B; } +}; + +class DiffAOTModel final : public MockAOTModelBase { +public: + DiffAOTModel() = default; + void Run() override { R = A - B; } +}; + +static const char *M1Selector = "the model that subtracts"; +static const char *M2Selector = "the model that adds"; + +static MD5::MD5Result Hash1 = MD5::hash(arrayRefFromStringRef(M1Selector)); +static MD5::MD5Result Hash2 = MD5::hash(arrayRefFromStringRef(M2Selector)); +class ComposedAOTModel final { + DiffAOTModel M1; + AdditionAOTModel M2; + uint64_t Selector[2] = {0}; + + bool isHashSameAsSelector(const std::pair<uint64_t, uint64_t> &Words) const { + return Selector[0] == Words.first && Selector[1] == Words.second; + } + MockAOTModelBase *getModel() { + if (isHashSameAsSelector(Hash1.words())) + return &M1; + if (isHashSameAsSelector(Hash2.words())) + return &M2; + llvm_unreachable("Should be one of the two"); + } + +public: + ComposedAOTModel() = default; + int LookupArgIndex(const std::string &Name) { + if (Name == "prefix__model_selector") + return 2; + return getModel()->LookupArgIndex(Name); + } + int LookupResultIndex(const std::string &Name) { + return getModel()->LookupResultIndex(Name); + } + void *arg_data(int Index) { + if (Index == 2) + return Selector; + return getModel()->arg_data(Index); + } + void *result_data(int RIndex) { return getModel()->result_data(RIndex); } + void Run() { getModel()->Run(); } +}; + +static EmbeddedModelRunnerOptions makeOptions() { + EmbeddedModelRunnerOptions Opts; + Opts.setFeedPrefix("prefix_"); + return Opts; +} } // namespace llvm TEST(NoInferenceModelRunner, AccessTensors) { @@ -86,8 +149,8 @@ TEST(ReleaseModeRunner, NormalUse) { LLVMContext Ctx; std::vector<TensorSpec> Inputs{TensorSpec::createSpec<int64_t>("a", {1}), TensorSpec::createSpec<int64_t>("b", {1})}; - auto Evaluator = std::make_unique<ReleaseModeModelRunner<MockAOTModel>>( - Ctx, Inputs, "", "prefix_"); + auto Evaluator = std::make_unique<ReleaseModeModelRunner<AdditionAOTModel>>( + Ctx, Inputs, "", makeOptions()); *Evaluator->getTensor<int64_t>(0) = 1; *Evaluator->getTensor<int64_t>(1) = 2; EXPECT_EQ(Evaluator->evaluate<int64_t>(), 3); @@ -100,8 +163,8 @@ TEST(ReleaseModeRunner, ExtraFeatures) { std::vector<TensorSpec> Inputs{TensorSpec::createSpec<int64_t>("a", {1}), TensorSpec::createSpec<int64_t>("b", {1}), TensorSpec::createSpec<int64_t>("c", {1})}; - auto Evaluator = std::make_unique<ReleaseModeModelRunner<MockAOTModel>>( - Ctx, Inputs, "", "prefix_"); + auto Evaluator = std::make_unique<ReleaseModeModelRunner<AdditionAOTModel>>( + Ctx, Inputs, "", makeOptions()); *Evaluator->getTensor<int64_t>(0) = 1; *Evaluator->getTensor<int64_t>(1) = 2; *Evaluator->getTensor<int64_t>(2) = -3; @@ -118,8 +181,8 @@ TEST(ReleaseModeRunner, ExtraFeaturesOutOfOrder) { TensorSpec::createSpec<int64_t>("c", {1}), TensorSpec::createSpec<int64_t>("b", {1}), }; - auto Evaluator = std::make_unique<ReleaseModeModelRunner<MockAOTModel>>( - Ctx, Inputs, "", "prefix_"); + auto Evaluator = std::make_unique<ReleaseModeModelRunner<AdditionAOTModel>>( + Ctx, Inputs, "", makeOptions()); *Evaluator->getTensor<int64_t>(0) = 1; // a *Evaluator->getTensor<int64_t>(1) = 2; // c *Evaluator->getTensor<int64_t>(2) = -3; // b @@ -129,6 +192,56 @@ TEST(ReleaseModeRunner, ExtraFeaturesOutOfOrder) { EXPECT_EQ(*Evaluator->getTensor<int64_t>(2), -3); } +// We expect an error to be reported early if the user tried to specify a model +// selector, but the model in fact doesn't support that. +TEST(ReleaseModelRunner, ModelSelectorNoInputFeaturePresent) { + LLVMContext Ctx; + std::vector<TensorSpec> Inputs{TensorSpec::createSpec<int64_t>("a", {1}), + TensorSpec::createSpec<int64_t>("b", {1})}; + EXPECT_DEATH(std::make_unique<ReleaseModeModelRunner<AdditionAOTModel>>( + Ctx, Inputs, "", makeOptions().setModelSelector(M2Selector)), + "A model selector was specified but the underlying model does " + "not expose a _model_selector input"); +} + +TEST(ReleaseModelRunner, ModelSelectorNoSelectorGiven) { + LLVMContext Ctx; + std::vector<TensorSpec> Inputs{TensorSpec::createSpec<int64_t>("a", {1}), + TensorSpec::createSpec<int64_t>("b", {1})}; + EXPECT_DEATH( + std::make_unique<ReleaseModeModelRunner<ComposedAOTModel>>( + Ctx, Inputs, "", makeOptions()), + "A model selector was not specified but the underlying model requires " + "selecting one because it exposes a _model_selector input"); +} + +// Test that we correctly set up the _model_selector tensor value. We are only +// responsbile for what happens if the user doesn't specify a value (but the +// model supports the feature), or if the user specifies one, and we correctly +// populate the tensor, and do so upfront (in case the model implementation +// needs that for subsequent tensor buffer lookups). +TEST(ReleaseModelRunner, ModelSelector) { + LLVMContext Ctx; + std::vector<TensorSpec> Inputs{TensorSpec::createSpec<int64_t>("a", {1}), + TensorSpec::createSpec<int64_t>("b", {1})}; + // This explicitly asks for M1 + auto Evaluator = std::make_unique<ReleaseModeModelRunner<ComposedAOTModel>>( + Ctx, Inputs, "", makeOptions().setModelSelector(M1Selector)); + *Evaluator->getTensor<int64_t>(0) = 1; + *Evaluator->getTensor<int64_t>(1) = 2; + EXPECT_EQ(Evaluator->evaluate<int64_t>(), -1); + + // Ask for M2 + Evaluator = std::make_unique<ReleaseModeModelRunner<ComposedAOTModel>>( + Ctx, Inputs, "", makeOptions().setModelSelector(M2Selector)); + *Evaluator->getTensor<int64_t>(0) = 1; + *Evaluator->getTensor<int64_t>(1) = 2; + EXPECT_EQ(Evaluator->evaluate<int64_t>(), 3); + + // Asking for a model that's not supported isn't handled by our infra and we + // expect the model implementation to fail at a point. +} + #if defined(LLVM_ON_UNIX) TEST(InteractiveModelRunner, Evaluation) { LLVMContext Ctx; |