[MLInliner] Factor out logging

Factored out the logging facility, to allow its reuse outside the
inliner.

Differential Revision: https://reviews.llvm.org/D88770
This commit is contained in:
Mircea Trofin 2020-10-02 20:28:49 -07:00
parent c3e07a0018
commit 36bb1fb1fe
5 changed files with 297 additions and 142 deletions

View File

@ -100,6 +100,64 @@ private:
Optional<TensorSpec> getTensorSpecFromJSON(LLVMContext &Ctx,
const json::Value &Value);
/// Logging utility - given an ordered specification of features, and assuming
/// a scalar reward, allow logging feature values and rewards, and then print
/// as tf.train.SequenceExample text protobuf.
/// The assumption is that, for an event to be logged (i.e. a set of feature
/// values and a reward), the user calls the log* API for each feature exactly
/// once, providing the index matching the position in the feature spec list
/// provided at construction:
/// event 0:
/// logTensorValue(0, ...)
/// logTensorValue(1, ...)
/// ...
/// logReward(...)
/// event 1:
/// logTensorValue(0, ...)
/// logTensorValue(1, ...)
/// ...
/// logReward(...)
///
/// At the end, call print to generate the protobuf.
class Logger final {
public:
struct LoggedFeatureSpec {
TensorSpec Spec;
Optional<std::string> LoggingName;
};
/// Construct a Logger. If IncludeReward is false, then logReward shouldn't
/// be called, and the reward feature won't be printed out.
Logger(const std::vector<LoggedFeatureSpec> &FeatureSpecs,
const TensorSpec &RewardSpec, bool IncludeReward)
: FeatureSpecs(FeatureSpecs), RewardSpec(RewardSpec),
RawLogData(FeatureSpecs.size() + IncludeReward),
IncludeReward(IncludeReward) {}
template <typename T> void logReward(T Value) {
assert(IncludeReward);
logTensorValue(RawLogData.size() - 1, &Value);
}
template <typename T>
void logTensorValue(size_t FeatureID, const T *Value, size_t Size = 1) {
const char *Start = reinterpret_cast<const char *>(Value);
const char *End = Start + sizeof(T) * Size;
RawLogData[FeatureID].insert(RawLogData[FeatureID].end(), Start, End);
}
void print(raw_ostream &OS);
private:
std::vector<LoggedFeatureSpec> FeatureSpecs;
TensorSpec RewardSpec;
/// RawData has one entry per feature, plus one more for the reward.
/// Each feature's values are then stored in a vector, in succession.
/// This means the ith event is stored at [*][i]
std::vector<std::vector<char>> RawLogData;
const bool IncludeReward;
};
class TFModelEvaluator final {
public:
/// The result of a model evaluation. Handles the lifetime of the output

View File

@ -74,11 +74,11 @@ namespace {
/// An InlineEvent, used by TrainingLogger.
struct InlineEvent {
/// What the default policy's decision would have been.
bool DefaultDecision = false;
int64_t DefaultDecision = 0;
/// What we advised. When training off the default policy, this is the same as
/// DefaultDecision.
bool AdvisedDecision = false;
int64_t AdvisedDecision = 0;
/// What actually happened. This would be 'false' in the case of an inline
/// error, even if AdvisedDecision were true, otherwise it agrees with
@ -109,91 +109,16 @@ public:
void print();
private:
/// Write the values of one tensor as a list.
template <typename T>
void writeTensorValues(raw_fd_ostream &OutFile, const char *TensorData,
size_t ElemCount) const {
OutFile << "[";
const T *TypedData = reinterpret_cast<const T *>(TensorData);
for (size_t I = 0; I < ElemCount; ++I) {
if (I > 0)
OutFile << ", ";
OutFile << TypedData[I];
}
OutFile << "]";
}
/// Write a list of tensors as a sequence of TensorFlow FeatureList protobufs.
/// The tensors are assumed to be stored contiguously, in row-major format,
/// in the TensorData buffer. Each tensor has the shape given by Spec. The
/// feature name in the output is either the provided LoggingName, if
/// specified, otherwise it's the name of the tensor (as given by Spec).
template <typename T>
void
writeTensorsAsFeatureLists(raw_fd_ostream &OutFile, const TensorSpec &Spec,
const T *TensorData, size_t TensorCount,
Optional<StringRef> LoggingName = None) const {
writeRawTensorsAsFeatureLists(OutFile, Spec,
reinterpret_cast<const char *>(TensorData),
TensorCount, LoggingName);
}
/// Untyped implementation of the API above.
void
writeRawTensorsAsFeatureLists(raw_fd_ostream &OutFile, const TensorSpec &Spec,
const char *TensorData, size_t TensorCount,
Optional<StringRef> LoggingName = None) const {
const char *FieldName = "<invalid>";
std::function<void(const char *)> ValueWriter;
// The 'Feature' protobuf only has 3 possible fields: float_list,
// int64_list, or bytes_list, so we capture int32 values as int64. We don't
// support any other types.
if (Spec.isElementType<int64_t>()) {
FieldName = "int64_list";
ValueWriter = [&](const char *Data) {
writeTensorValues<int64_t>(OutFile, Data, Spec.getElementCount());
};
} else if (Spec.isElementType<int32_t>()) {
FieldName = "int64_list";
ValueWriter = [&](const char *Data) {
writeTensorValues<int32_t>(OutFile, Data, Spec.getElementCount());
};
} else if (Spec.isElementType<float>()) {
FieldName = "float_list";
ValueWriter = [&](const char *Data) {
writeTensorValues<float>(OutFile, Data, Spec.getElementCount());
};
} else
llvm_unreachable("Unsupported tensor type.");
OutFile << " feature_list: {\n";
OutFile << " key: "
<< "\"" << (LoggingName ? *LoggingName : Spec.name()) << "\" ";
OutFile << "value: {\n";
size_t TensorByteSize = Spec.getElementCount() * Spec.getElementByteSize();
for (const char *P = TensorData,
*E = TensorData + TensorByteSize * TensorCount;
P < E; P += TensorByteSize) {
OutFile << " feature: { " << FieldName << ": { value: ";
ValueWriter(P);
OutFile << " } }\n";
}
OutFile << " }\n";
OutFile << " }\n";
}
StringRef LogFileName;
const ModelUnderTrainingRunner *const MUTR;
std::vector<InlineFeatures> Features;
std::vector<int64_t> DefaultDecisions;
// We store all outputs as data blobs, but we always expect to have one, the
// first one, representing the decision. While we could track that separately,
// for uniformity, we store it, generically, here.
std::vector<std::vector<char>> Outputs;
std::unique_ptr<Logger> L;
std::vector<bool> Effects;
std::vector<int64_t> Rewards;
/// There's at least one output. We'll set this to a different value if MUTR
/// is avaliable.
size_t OutputCount = 1;
/// Set these 2 clearly OOB, to make sure we set them later.
size_t DefaultDecisionPos = std::numeric_limits<size_t>::max();
size_t DecisionPos = std::numeric_limits<size_t>::max();
};
/// An extension of the MLInlineAdvisor for the 'development' mode, targeting
@ -331,8 +256,8 @@ private:
TrainingLogger &Logger;
const Optional<size_t> CallerSizeEstimateBefore;
const Optional<size_t> CalleeSizeEstimateBefore;
const bool DefaultDecision;
const bool Mandatory;
const int64_t DefaultDecision;
const int64_t Mandatory;
};
/// A pseudo model runner. We use it to store feature values when collecting
@ -402,69 +327,62 @@ private:
TrainingLogger::TrainingLogger(StringRef LogFileName,
const ModelUnderTrainingRunner *MUTR)
: LogFileName(LogFileName), MUTR(MUTR) {
for (size_t I = 0; I < NumberOfFeatures; ++I)
Features.push_back(InlineFeatures());
// The first output is the inlining decision.
auto OutputCount = MUTR ? MUTR->outputSpecs().size() : 1;
Outputs.assign(OutputCount, std::vector<char>());
if (MUTR)
OutputCount = MUTR->outputSpecs().size();
std::vector<Logger::LoggedFeatureSpec> FT;
for (size_t I = 0; I < NumberOfFeatures; ++I)
FT.push_back(
{TensorSpec::createSpec<int64_t>(FeatureNameMap.at(I), {1}), None});
for (size_t I = 1; I < OutputCount; ++I)
FT.push_back({MUTR->outputSpecs()[I], MUTR->outputNames()[I]});
DefaultDecisionPos = FT.size();
FT.push_back(
{TensorSpec::createSpec<int64_t>(DefaultDecisionName, {1}), None});
DecisionPos = FT.size();
FT.push_back({TensorSpec::createSpec<int64_t>(DecisionName, {1}), None});
L = std::make_unique<Logger>(
FT, TensorSpec::createSpec<int64_t>(RewardName, {1}),
InlineSizeEstimatorAnalysis::isEvaluatorRequested());
}
/// Log one inlining event.
void TrainingLogger::logInlineEvent(const InlineEvent &Event,
const MLModelRunner &ModelRunner) {
for (size_t I = 0; I < NumberOfFeatures; ++I)
Features[I].push_back(ModelRunner.getFeature(I));
size_t CurrentFeature = 0;
for (; CurrentFeature < NumberOfFeatures; ++CurrentFeature) {
int64_t F = ModelRunner.getFeature(CurrentFeature);
L->logTensorValue(CurrentFeature, &F);
}
Effects.push_back(Event.Effect);
Rewards.push_back(Event.Reward);
DefaultDecisions.push_back(Event.DefaultDecision);
int64_t Advice = static_cast<int64_t>(Event.AdvisedDecision);
const char *AdviceData = reinterpret_cast<const char *>(&Advice);
Outputs[0].insert(Outputs[0].end(), AdviceData, AdviceData + sizeof(int64_t));
for (size_t I = 1; I < Outputs.size(); ++I) {
for (size_t I = 1; I < OutputCount; ++I) {
const auto &Result = *MUTR->lastEvaluationResult();
auto &Spec = MUTR->outputSpecs()[I];
const char *RawData =
reinterpret_cast<const char *>(Result.getUntypedTensorValue(I));
Outputs[I].insert(Outputs[I].end(), RawData,
RawData +
Spec.getElementCount() * Spec.getElementByteSize());
L->logTensorValue(CurrentFeature, RawData,
Spec.getElementCount() * Spec.getElementByteSize());
++CurrentFeature;
}
assert(CurrentFeature == DefaultDecisionPos);
L->logTensorValue(DefaultDecisionPos, &Event.DefaultDecision);
L->logTensorValue(DecisionPos, &Event.AdvisedDecision);
if (InlineSizeEstimatorAnalysis::isEvaluatorRequested())
L->logReward(Event.Reward);
// For debugging / later use
Effects.push_back(Event.Effect);
}
void TrainingLogger::print() {
std::error_code EC;
raw_fd_ostream OutFile(LogFileName, EC);
size_t NumberOfRecords = Rewards.size();
if (NumberOfRecords == 0)
return;
OutFile << "feature_lists: {\n";
for (size_t I = 0; I < Features.size(); ++I)
writeTensorsAsFeatureLists(
OutFile, TensorSpec::createSpec<int64_t>(FeatureNameMap.at(I), {1}),
Features[I].data(), NumberOfRecords);
writeTensorsAsFeatureLists(
OutFile, TensorSpec::createSpec<int64_t>(DefaultDecisionName, {1}),
DefaultDecisions.data(), NumberOfRecords);
writeRawTensorsAsFeatureLists(
OutFile, TensorSpec::createSpec<int64_t>(DecisionName, {1}),
Outputs[0].data(), NumberOfRecords);
if (InlineSizeEstimatorAnalysis::isEvaluatorRequested())
writeTensorsAsFeatureLists(OutFile,
TensorSpec::createSpec<int64_t>(RewardName, {1}),
Rewards.data(), NumberOfRecords);
for (size_t I = 1; I < Outputs.size(); ++I)
writeRawTensorsAsFeatureLists(OutFile, MUTR->outputSpecs()[I],
Outputs[I].data(), NumberOfRecords,
StringRef(MUTR->outputNames()[I]));
OutFile << "}\n";
L->print(OutFile);
}
DevelopmentModeMLInlineAdvisor::DevelopmentModeMLInlineAdvisor(

View File

@ -62,6 +62,82 @@ TFStatusPtr createTFStatus() {
TFSessionOptionsPtr createTFSessionOptions() {
return TFSessionOptionsPtr(TF_NewSessionOptions(), &TF_DeleteSessionOptions);
}
/// Write the values of one tensor as a list.
template <typename T>
void writeTensorValues(raw_ostream &OutFile, const char *TensorData,
size_t ElemCount) {
OutFile << "[";
const T *TypedData = reinterpret_cast<const T *>(TensorData);
for (size_t I = 0; I < ElemCount; ++I) {
if (I > 0)
OutFile << ", ";
OutFile << TypedData[I];
}
OutFile << "]";
}
/// Untyped implementation of the API above.
void writeRawTensorsAsFeatureLists(raw_ostream &OutFile,
const Logger::LoggedFeatureSpec &LoggedSpec,
const char *TensorData, size_t TensorCount) {
const char *FieldName = "<invalid>";
std::function<void(const char *)> ValueWriter;
const auto &Spec = LoggedSpec.Spec;
// The 'Feature' protobuf only has 3 possible fields: float_list,
// int64_list, or bytes_list, so we capture int32 values as int64. We don't
// support any other types.
if (Spec.isElementType<int64_t>()) {
FieldName = "int64_list";
ValueWriter = [&](const char *Data) {
writeTensorValues<int64_t>(OutFile, Data, Spec.getElementCount());
};
} else if (Spec.isElementType<int32_t>()) {
FieldName = "int64_list";
ValueWriter = [&](const char *Data) {
writeTensorValues<int32_t>(OutFile, Data, Spec.getElementCount());
};
} else if (Spec.isElementType<float>()) {
FieldName = "float_list";
ValueWriter = [&](const char *Data) {
writeTensorValues<float>(OutFile, Data, Spec.getElementCount());
};
} else {
llvm_unreachable("Unsupported tensor type.");
}
OutFile << " feature_list: {\n";
OutFile << " key: "
<< "\""
<< (LoggedSpec.LoggingName ? *LoggedSpec.LoggingName : Spec.name())
<< "\" ";
OutFile << "value: {\n";
size_t TensorByteSize = Spec.getElementCount() * Spec.getElementByteSize();
for (const char *P = TensorData,
*E = TensorData + TensorByteSize * TensorCount;
P < E; P += TensorByteSize) {
OutFile << " feature: { " << FieldName << ": { value: ";
ValueWriter(P);
OutFile << " } }\n";
}
OutFile << " }\n";
OutFile << " }\n";
}
/// Write a list of tensors as a sequence of TensorFlow FeatureList protobufs.
/// The tensors are assumed to be stored contiguously, in row-major format,
/// in the TensorData buffer. Each tensor has the shape given by Spec. The
/// feature name in the output is either the provided LoggingName, if
/// specified, otherwise it's the name of the tensor (as given by Spec).
template <typename T>
void writeTensorsAsFeatureLists(raw_ostream &OutFile,
const Logger::LoggedFeatureSpec &Spec,
const T *TensorData, size_t TensorCount) {
writeRawTensorsAsFeatureLists(
OutFile, Spec, reinterpret_cast<const char *>(TensorData), TensorCount);
}
} // namespace
namespace llvm {
@ -318,4 +394,27 @@ TFUTILS_SUPPORTED_TYPES(TFUTILS_GETDATATYPE_IMPL)
TFModelEvaluator::EvaluationResult::~EvaluationResult() {}
TFModelEvaluator::~TFModelEvaluator() {}
void Logger::print(raw_ostream &OS) {
if (RawLogData.empty())
return;
if (RawLogData[0].empty())
return;
size_t Tensor0Size = FeatureSpecs[0].Spec.getElementCount() *
FeatureSpecs[0].Spec.getElementByteSize();
size_t NumberOfRecords = RawLogData[0].size() / Tensor0Size;
if (NumberOfRecords == 0)
return;
OS << "feature_lists: {\n";
for (size_t I = 0; I < FeatureSpecs.size(); ++I)
writeTensorsAsFeatureLists(OS, FeatureSpecs[I], RawLogData[I].data(),
NumberOfRecords);
if (IncludeReward)
writeTensorsAsFeatureLists(OS, {RewardSpec, None}, RawLogData.back().data(),
NumberOfRecords);
OS << "}\n";
}
#endif // defined(LLVM_HAVE_TF_API)

View File

@ -42,19 +42,13 @@ define dso_local i32 @top() {
!1 = !{!"clang version 7.0.0-6 (tags/RELEASE_700/final)"}
; Check we produce a protobuf that has inlining decisions and rewards.
; CHECK: feature_lists: {
; CHECK-NOT: fake_extra_output
; EXTRA-OUTPUTS: key: "fake_extra_output" value: {
; EXTRA-OUTPUTS-NEXT: feature: { int64_list: { value: [1] } }
; CHECK: key: "inlining_decision" value: {
; CHECK-NEXT: feature: { int64_list: { value: [1] } }
; CHECK-NEXT: }
; CHECK-NEXT: }
; CHECK-NEXT: feature_list: {
; CHECK-NEXT: key: "delta_size" value: {
; CHECK: key: "delta_size" value: {
; CHECK-NEXT: feature: { int64_list: { value: [0] } }
; CHECK-NEXT: }
; CHECK-NEXT: }
; NOREWARD-NOT: key: "delta_size" value: {
; CHECK-NOT: fake_extra_output
; EXTRA-OUTPUTS: key: "fake_extra_output" value: {
; EXTRA-OUTPUTS-NEXT: feature: { int64_list: { value: [1] } }
; EXTRA-OUTPUTS-NEXT: }
; EXTRA-OUTPUTS-NEXT: }

View File

@ -142,3 +142,89 @@ TEST(TFUtilsTest, TensorSpecSizesAndTypes) {
EXPECT_EQ(Spec3DLarge.getElementByteSize(), sizeof(float));
EXPECT_EQ(Spec1D.getElementByteSize(), sizeof(int16_t));
}
TEST(TFUtilsTest, Logger) {
std::vector<Logger::LoggedFeatureSpec> Features;
Features.push_back(
{TensorSpec::createSpec<float>("the_float", {2, 3}), None});
Features.push_back({TensorSpec::createSpec<int64_t>("the_int", {2}),
std::string("alternate_name")});
auto Rewards = TensorSpec::createSpec<float>("reward", {1});
Logger L(Features, Rewards, true);
float F00[]{0.0, 0.1, 0.2, 0.3, 0.4, 0.5};
int64_t F01[]{2, 3};
L.logTensorValue(0, F00, 6);
L.logTensorValue(1, F01, 2);
L.logReward<float>(3.4);
float F10[]{0.0, 1.0, 2.0, 3.0, 4.0, 5.0};
int64_t F11[]{-2, -3};
L.logTensorValue(0, F10, 6);
L.logTensorValue(1, F11, 2);
L.logReward<float>(-3.0);
const auto *Expected = R"(feature_lists: {
feature_list: {
key: "the_float" value: {
feature: { float_list: { value: [0.000000e+00, 1.000000e-01, 2.000000e-01, 3.000000e-01, 4.000000e-01, 5.000000e-01] } }
feature: { float_list: { value: [0.000000e+00, 1.000000e+00, 2.000000e+00, 3.000000e+00, 4.000000e+00, 5.000000e+00] } }
}
}
feature_list: {
key: "alternate_name" value: {
feature: { int64_list: { value: [2, 3] } }
feature: { int64_list: { value: [-2, -3] } }
}
}
feature_list: {
key: "reward" value: {
feature: { float_list: { value: [3.400000e+00] } }
feature: { float_list: { value: [-3.000000e+00] } }
}
}
}
)";
std::string Result;
raw_string_ostream OS(Result);
L.print(OS);
EXPECT_EQ(Result, Expected);
}
TEST(TFUtilsTest, LoggerNoReward) {
std::vector<Logger::LoggedFeatureSpec> Features;
Features.push_back(
{TensorSpec::createSpec<float>("the_float", {2, 3}), None});
Features.push_back({TensorSpec::createSpec<int64_t>("the_int", {2}),
std::string("alternate_name")});
auto Rewards = TensorSpec::createSpec<float>("reward", {1});
Logger L(Features, Rewards, false);
float F00[]{0.0, 0.1, 0.2, 0.3, 0.4, 0.5};
int64_t F01[]{2, 3};
L.logTensorValue(0, F00, 6);
L.logTensorValue(1, F01, 2);
float F10[]{0.0, 1.0, 2.0, 3.0, 4.0, 5.0};
int64_t F11[]{-2, -3};
L.logTensorValue(0, F10, 6);
L.logTensorValue(1, F11, 2);
const auto *Expected = R"(feature_lists: {
feature_list: {
key: "the_float" value: {
feature: { float_list: { value: [0.000000e+00, 1.000000e-01, 2.000000e-01, 3.000000e-01, 4.000000e-01, 5.000000e-01] } }
feature: { float_list: { value: [0.000000e+00, 1.000000e+00, 2.000000e+00, 3.000000e+00, 4.000000e+00, 5.000000e+00] } }
}
}
feature_list: {
key: "alternate_name" value: {
feature: { int64_list: { value: [2, 3] } }
feature: { int64_list: { value: [-2, -3] } }
}
}
}
)";
std::string Result;
raw_string_ostream OS(Result);
L.print(OS);
EXPECT_EQ(Result, Expected);
}