aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--llvm/include/llvm/Analysis/ReleaseModeModelRunner.h96
-rw-r--r--llvm/lib/Analysis/MLInlineAdvisor.cpp6
-rw-r--r--llvm/unittests/Analysis/MLModelRunnerTest.cpp137
3 files changed, 212 insertions, 27 deletions
diff --git a/llvm/include/llvm/Analysis/ReleaseModeModelRunner.h b/llvm/include/llvm/Analysis/ReleaseModeModelRunner.h
index 9185513..9fb4ff4 100644
--- a/llvm/include/llvm/Analysis/ReleaseModeModelRunner.h
+++ b/llvm/include/llvm/Analysis/ReleaseModeModelRunner.h
@@ -14,17 +14,43 @@
#ifndef LLVM_ANALYSIS_RELEASEMODEMODELRUNNER_H
#define LLVM_ANALYSIS_RELEASEMODEMODELRUNNER_H
+#include "llvm/ADT/StringExtras.h"
#include "llvm/Analysis/MLModelRunner.h"
#include "llvm/Analysis/TensorSpec.h"
#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/MD5.h"
#include <memory>
-#include <vector>
namespace llvm {
/// ReleaseModeModelRunner - production mode implementation of the
/// MLModelRunner. It uses an AOT-compiled SavedModel for efficient execution.
+struct EmbeddedModelRunnerOptions {
+ /// Feed and Fetch feature prefixes - i.e. a feature named "foo" will be
+ /// looked up as {FeedPrefix}_foo; and the output named "bar" will be looked
+ /// up as {FetchPrefix}_bar
+ StringRef FeedPrefix = "feed_";
+ StringRef FetchPrefix = "fetch_";
+
+ /// ModelSelector is the name (recognized by the AOT-ed model) of a sub-model
+ /// to use. "" is allowed if the model doesn't support sub-models.
+ StringRef ModelSelector = "";
+
+ EmbeddedModelRunnerOptions &setFeedPrefix(StringRef Value) {
+ FeedPrefix = Value;
+ return *this;
+ }
+ EmbeddedModelRunnerOptions &setFetchPrefix(StringRef Value) {
+ FetchPrefix = Value;
+ return *this;
+ }
+ EmbeddedModelRunnerOptions &setModelSelector(StringRef Value) {
+ ModelSelector = Value;
+ return *this;
+ }
+};
+
template <class TGen>
class ReleaseModeModelRunner final : public MLModelRunner {
public:
@@ -32,22 +58,50 @@ public:
/// std::array or std::vector, that has a size() method.
template <class FType>
ReleaseModeModelRunner(LLVMContext &Ctx, const FType &InputSpec,
- StringRef DecisionName, StringRef FeedPrefix = "feed_",
- StringRef FetchPrefix = "fetch_")
- : MLModelRunner(Ctx, MLModelRunner::Kind::Release, InputSpec.size()),
+ StringRef DecisionName,
+ const EmbeddedModelRunnerOptions &Options = {})
+ : MLModelRunner(Ctx, MLModelRunner::Kind::Release, InputSpec.size() + 1),
CompiledModel(std::make_unique<TGen>()) {
assert(CompiledModel && "The CompiledModel should be valid");
-
- for (size_t I = 0; I < InputSpec.size(); ++I) {
- const int Index =
- CompiledModel->LookupArgIndex(FeedPrefix.str() + InputSpec[I].name());
- void *Buffer = nullptr;
- if (Index >= 0)
- Buffer = CompiledModel->arg_data(Index);
- setUpBufferForTensor(I, InputSpec[I], Buffer);
+ // Set up the model_selector past all the InputSpecs in all cases.
+ // - if the model doesn't have such a feature, but the user requested it,
+ // we report error. Same if the model supports it but the user didn't
+ // specify it
+ // - finally, we compute the MD5 hash of the user input and set the value
+ // of the model selector to {high, low}
+ bool InputIsPresent = true;
+ populateTensor(InputSpec.size(),
+ TensorSpec::createSpec<uint64_t>("_model_selector", {2}),
+ Options.FeedPrefix, InputIsPresent);
+
+ // If we hit the "report an error" cases outlined above, continue with the
+ // set up in case there's some custom diagnostics handler installed and it
+ // doesn't promptly exit.
+ if (Options.ModelSelector.empty() && InputIsPresent)
+ Ctx.emitError(
+ "A model selector was not specified but the underlying model "
+ "requires selecting one because it exposes a _model_selector input");
+ uint64_t High = 0;
+ uint64_t Low = 0;
+ if (!Options.ModelSelector.empty()) {
+ if (!InputIsPresent)
+ Ctx.emitError("A model selector was specified but the underlying model "
+ "does not expose a _model_selector input");
+ const auto Hash = MD5::hash(arrayRefFromStringRef(Options.ModelSelector));
+ High = Hash.high();
+ Low = Hash.low();
}
-
- ResultIndex = CompiledModel->LookupResultIndex(FetchPrefix.str() +
+ getTensor<uint64_t>(InputSpec.size())[0] = High;
+ getTensor<uint64_t>(InputSpec.size())[1] = Low;
+ // At this point, the model selector is set up. If the user didn't provide
+ // one, but the model has a _model_selector, it'll be set to (0, 0) which
+ // the composite model should treat as error as part of its implementation
+ // (but that should only matter if there is a custom handler that doesn't
+ // exit on error)
+ for (size_t I = 0; I < InputSpec.size(); ++I)
+ populateTensor(I, InputSpec[I], Options.FeedPrefix, InputIsPresent);
+
+ ResultIndex = CompiledModel->LookupResultIndex(Options.FetchPrefix.str() +
DecisionName.str());
assert(ResultIndex >= 0 && "Cannot find DecisionName in inlining model");
}
@@ -59,6 +113,20 @@ public:
}
private:
+ // fetch the model-provided buffer for the given Spec, or let MLModelRunner
+ // create a scratch buffer. Indicate back to the caller if the model had that
+ // input in the first place.
+ void populateTensor(size_t Pos, const TensorSpec &Spec, StringRef Prefix,
+ bool &InputIsPresent) {
+ const int Index =
+ CompiledModel->LookupArgIndex((Prefix + Spec.name()).str());
+ void *Buffer = nullptr;
+ InputIsPresent = Index >= 0;
+ if (InputIsPresent)
+ Buffer = CompiledModel->arg_data(Index);
+ setUpBufferForTensor(Pos, Spec, Buffer);
+ }
+
void *evaluateUntyped() override {
CompiledModel->Run();
return CompiledModel->result_data(ResultIndex);
diff --git a/llvm/lib/Analysis/MLInlineAdvisor.cpp b/llvm/lib/Analysis/MLInlineAdvisor.cpp
index 934c0cf..14eadf6 100644
--- a/llvm/lib/Analysis/MLInlineAdvisor.cpp
+++ b/llvm/lib/Analysis/MLInlineAdvisor.cpp
@@ -56,6 +56,9 @@ static cl::opt<SkipMLPolicyCriteria> SkipPolicy(
clEnumValN(SkipMLPolicyCriteria::IfCallerIsNotCold,
"if-caller-not-cold", "if the caller is not cold")));
+static cl::opt<std::string> ModelSelector("ml-inliner-model-selector",
+ cl::Hidden, cl::init(""));
+
#if defined(LLVM_HAVE_TF_AOT_INLINERSIZEMODEL)
// codegen-ed file
#include "InlinerSizeModel.h" // NOLINT
@@ -73,7 +76,8 @@ llvm::getReleaseModeAdvisor(Module &M, ModuleAnalysisManager &MAM,
std::unique_ptr<MLModelRunner> AOTRunner;
if (InteractiveChannelBaseName.empty())
AOTRunner = std::make_unique<ReleaseModeModelRunner<CompiledModelType>>(
- M.getContext(), FeatureMap, DecisionName);
+ M.getContext(), FeatureMap, DecisionName,
+ EmbeddedModelRunnerOptions().setModelSelector(ModelSelector));
else {
auto Features = FeatureMap;
if (InteractiveIncludeDefault)
diff --git a/llvm/unittests/Analysis/MLModelRunnerTest.cpp b/llvm/unittests/Analysis/MLModelRunnerTest.cpp
index 007a8cf..53ad2fc 100644
--- a/llvm/unittests/Analysis/MLModelRunnerTest.cpp
+++ b/llvm/unittests/Analysis/MLModelRunnerTest.cpp
@@ -7,10 +7,12 @@
//===----------------------------------------------------------------------===//
#include "llvm/Analysis/MLModelRunner.h"
+#include "llvm/ADT/StringExtras.h"
#include "llvm/Analysis/InteractiveModelRunner.h"
#include "llvm/Analysis/NoInferenceModelRunner.h"
#include "llvm/Analysis/ReleaseModeModelRunner.h"
#include "llvm/Support/BinaryByteStream.h"
+#include "llvm/Support/ErrorHandling.h"
#include "llvm/Support/FileSystem.h"
#include "llvm/Support/FileUtilities.h"
#include "llvm/Support/JSON.h"
@@ -28,14 +30,17 @@ namespace llvm {
// This is a mock of the kind of AOT-generated model evaluator. It has 2 tensors
// of shape {1}, and 'evaluation' adds them.
// The interface is the one expected by ReleaseModelRunner.
-class MockAOTModel final {
+class MockAOTModelBase {
+protected:
int64_t A = 0;
int64_t B = 0;
int64_t R = 0;
public:
- MockAOTModel() = default;
- int LookupArgIndex(const std::string &Name) {
+ MockAOTModelBase() = default;
+ virtual ~MockAOTModelBase() = default;
+
+ virtual int LookupArgIndex(const std::string &Name) {
if (Name == "prefix_a")
return 0;
if (Name == "prefix_b")
@@ -43,13 +48,13 @@ public:
return -1;
}
int LookupResultIndex(const std::string &) { return 0; }
- void Run() { R = A + B; }
- void *result_data(int RIndex) {
+ virtual void Run() = 0;
+ virtual void *result_data(int RIndex) {
if (RIndex == 0)
return &R;
return nullptr;
}
- void *arg_data(int Index) {
+ virtual void *arg_data(int Index) {
switch (Index) {
case 0:
return &A;
@@ -60,6 +65,64 @@ public:
}
}
};
+
+class AdditionAOTModel final : public MockAOTModelBase {
+public:
+ AdditionAOTModel() = default;
+ void Run() override { R = A + B; }
+};
+
+class DiffAOTModel final : public MockAOTModelBase {
+public:
+ DiffAOTModel() = default;
+ void Run() override { R = A - B; }
+};
+
+static const char *M1Selector = "the model that subtracts";
+static const char *M2Selector = "the model that adds";
+
+static MD5::MD5Result Hash1 = MD5::hash(arrayRefFromStringRef(M1Selector));
+static MD5::MD5Result Hash2 = MD5::hash(arrayRefFromStringRef(M2Selector));
+class ComposedAOTModel final {
+ DiffAOTModel M1;
+ AdditionAOTModel M2;
+ uint64_t Selector[2] = {0};
+
+ bool isHashSameAsSelector(const std::pair<uint64_t, uint64_t> &Words) const {
+ return Selector[0] == Words.first && Selector[1] == Words.second;
+ }
+ MockAOTModelBase *getModel() {
+ if (isHashSameAsSelector(Hash1.words()))
+ return &M1;
+ if (isHashSameAsSelector(Hash2.words()))
+ return &M2;
+ llvm_unreachable("Should be one of the two");
+ }
+
+public:
+ ComposedAOTModel() = default;
+ int LookupArgIndex(const std::string &Name) {
+ if (Name == "prefix__model_selector")
+ return 2;
+ return getModel()->LookupArgIndex(Name);
+ }
+ int LookupResultIndex(const std::string &Name) {
+ return getModel()->LookupResultIndex(Name);
+ }
+ void *arg_data(int Index) {
+ if (Index == 2)
+ return Selector;
+ return getModel()->arg_data(Index);
+ }
+ void *result_data(int RIndex) { return getModel()->result_data(RIndex); }
+ void Run() { getModel()->Run(); }
+};
+
+static EmbeddedModelRunnerOptions makeOptions() {
+ EmbeddedModelRunnerOptions Opts;
+ Opts.setFeedPrefix("prefix_");
+ return Opts;
+}
} // namespace llvm
TEST(NoInferenceModelRunner, AccessTensors) {
@@ -86,8 +149,8 @@ TEST(ReleaseModeRunner, NormalUse) {
LLVMContext Ctx;
std::vector<TensorSpec> Inputs{TensorSpec::createSpec<int64_t>("a", {1}),
TensorSpec::createSpec<int64_t>("b", {1})};
- auto Evaluator = std::make_unique<ReleaseModeModelRunner<MockAOTModel>>(
- Ctx, Inputs, "", "prefix_");
+ auto Evaluator = std::make_unique<ReleaseModeModelRunner<AdditionAOTModel>>(
+ Ctx, Inputs, "", makeOptions());
*Evaluator->getTensor<int64_t>(0) = 1;
*Evaluator->getTensor<int64_t>(1) = 2;
EXPECT_EQ(Evaluator->evaluate<int64_t>(), 3);
@@ -100,8 +163,8 @@ TEST(ReleaseModeRunner, ExtraFeatures) {
std::vector<TensorSpec> Inputs{TensorSpec::createSpec<int64_t>("a", {1}),
TensorSpec::createSpec<int64_t>("b", {1}),
TensorSpec::createSpec<int64_t>("c", {1})};
- auto Evaluator = std::make_unique<ReleaseModeModelRunner<MockAOTModel>>(
- Ctx, Inputs, "", "prefix_");
+ auto Evaluator = std::make_unique<ReleaseModeModelRunner<AdditionAOTModel>>(
+ Ctx, Inputs, "", makeOptions());
*Evaluator->getTensor<int64_t>(0) = 1;
*Evaluator->getTensor<int64_t>(1) = 2;
*Evaluator->getTensor<int64_t>(2) = -3;
@@ -118,8 +181,8 @@ TEST(ReleaseModeRunner, ExtraFeaturesOutOfOrder) {
TensorSpec::createSpec<int64_t>("c", {1}),
TensorSpec::createSpec<int64_t>("b", {1}),
};
- auto Evaluator = std::make_unique<ReleaseModeModelRunner<MockAOTModel>>(
- Ctx, Inputs, "", "prefix_");
+ auto Evaluator = std::make_unique<ReleaseModeModelRunner<AdditionAOTModel>>(
+ Ctx, Inputs, "", makeOptions());
*Evaluator->getTensor<int64_t>(0) = 1; // a
*Evaluator->getTensor<int64_t>(1) = 2; // c
*Evaluator->getTensor<int64_t>(2) = -3; // b
@@ -129,6 +192,56 @@ TEST(ReleaseModeRunner, ExtraFeaturesOutOfOrder) {
EXPECT_EQ(*Evaluator->getTensor<int64_t>(2), -3);
}
+// We expect an error to be reported early if the user tried to specify a model
+// selector, but the model in fact doesn't support that.
+TEST(ReleaseModelRunner, ModelSelectorNoInputFeaturePresent) {
+ LLVMContext Ctx;
+ std::vector<TensorSpec> Inputs{TensorSpec::createSpec<int64_t>("a", {1}),
+ TensorSpec::createSpec<int64_t>("b", {1})};
+ EXPECT_DEATH(std::make_unique<ReleaseModeModelRunner<AdditionAOTModel>>(
+ Ctx, Inputs, "", makeOptions().setModelSelector(M2Selector)),
+ "A model selector was specified but the underlying model does "
+ "not expose a _model_selector input");
+}
+
+TEST(ReleaseModelRunner, ModelSelectorNoSelectorGiven) {
+ LLVMContext Ctx;
+ std::vector<TensorSpec> Inputs{TensorSpec::createSpec<int64_t>("a", {1}),
+ TensorSpec::createSpec<int64_t>("b", {1})};
+ EXPECT_DEATH(
+ std::make_unique<ReleaseModeModelRunner<ComposedAOTModel>>(
+ Ctx, Inputs, "", makeOptions()),
+ "A model selector was not specified but the underlying model requires "
+ "selecting one because it exposes a _model_selector input");
+}
+
+// Test that we correctly set up the _model_selector tensor value. We are only
+// responsbile for what happens if the user doesn't specify a value (but the
+// model supports the feature), or if the user specifies one, and we correctly
+// populate the tensor, and do so upfront (in case the model implementation
+// needs that for subsequent tensor buffer lookups).
+TEST(ReleaseModelRunner, ModelSelector) {
+ LLVMContext Ctx;
+ std::vector<TensorSpec> Inputs{TensorSpec::createSpec<int64_t>("a", {1}),
+ TensorSpec::createSpec<int64_t>("b", {1})};
+ // This explicitly asks for M1
+ auto Evaluator = std::make_unique<ReleaseModeModelRunner<ComposedAOTModel>>(
+ Ctx, Inputs, "", makeOptions().setModelSelector(M1Selector));
+ *Evaluator->getTensor<int64_t>(0) = 1;
+ *Evaluator->getTensor<int64_t>(1) = 2;
+ EXPECT_EQ(Evaluator->evaluate<int64_t>(), -1);
+
+ // Ask for M2
+ Evaluator = std::make_unique<ReleaseModeModelRunner<ComposedAOTModel>>(
+ Ctx, Inputs, "", makeOptions().setModelSelector(M2Selector));
+ *Evaluator->getTensor<int64_t>(0) = 1;
+ *Evaluator->getTensor<int64_t>(1) = 2;
+ EXPECT_EQ(Evaluator->evaluate<int64_t>(), 3);
+
+ // Asking for a model that's not supported isn't handled by our infra and we
+ // expect the model implementation to fail at a point.
+}
+
#if defined(LLVM_ON_UNIX)
TEST(InteractiveModelRunner, Evaluation) {
LLVMContext Ctx;