[ML] Add final reward logging facility.

Allow logging final rewards. A final reward is logged only once, and is serialized as all-zero values, except for the last one. Differential Revision: https://reviews.llvm.org/D89626
2020-10-17 08:40:44 -07:00 · 2020-10-17 08:40:44 -07:00 · d454328ea8
parent 7d8c19a4e9
commit d454328ea8
3 changed files with 82 additions and 23 deletions
--- a/llvm/include/llvm/Analysis/Utils/TFUtils.h
+++ b/llvm/include/llvm/Analysis/Utils/TFUtils.h
@ -139,6 +139,11 @@ public:
    logTensorValue(RawLogData.size() - 1, &Value);
  }

+  template <typename T> void logFinalReward(T Value) {
+    assert(RawLogData.back().empty());
+    logReward(Value);
+  }
+
  template <typename T>
  void logTensorValue(size_t FeatureID, const T *Value, size_t Size = 1) {
    const char *Start = reinterpret_cast<const char *>(Value);
--- a/llvm/lib/Analysis/TFUtils.cpp
+++ b/llvm/lib/Analysis/TFUtils.cpp
@ -77,10 +77,15 @@ void writeTensorValues(raw_ostream &OutFile, const char *TensorData,
  OutFile << "]";
 }

-/// Untyped implementation of the API above.
+/// Write a list of tensors as a sequence of TensorFlow FeatureList protobufs.
+/// The tensors are assumed to be stored contiguously, in row-major format,
+/// in the TensorData buffer. Each tensor has the shape given by Spec. The
+/// feature name in the output is either the provided LoggingName, if
+/// specified, otherwise it's the name of the tensor (as given by Spec).
 void writeRawTensorsAsFeatureLists(raw_ostream &OutFile,
                                   const Logger::LoggedFeatureSpec &LoggedSpec,
-                                   const char *TensorData, size_t TensorCount) {
+                                   const char *TensorData, size_t TensorCount,
+                                   bool FinalReward = false) {
  const char *FieldName = "<invalid>";
  std::function<void(const char *)> ValueWriter;
  const auto &Spec = LoggedSpec.Spec;
@ -115,29 +120,31 @@ void writeRawTensorsAsFeatureLists(raw_ostream &OutFile,
          << "\" ";
  OutFile << "value: {\n";
  size_t TensorByteSize = Spec.getElementCount() * Spec.getElementByteSize();
-  for (const char *P = TensorData,
-                  *E = TensorData + TensorByteSize * TensorCount;
-       P < E; P += TensorByteSize) {
+
+  auto WriteFeatureProto = [&](const char *P) {
    OutFile << "      feature: { " << FieldName << ": { value: ";
    ValueWriter(P);
    OutFile << " } }\n";
+  };
+
+  const char *CurrentTensor = TensorData;
+  static int64_t Zero = 0;
+  // Write all but the last value. If this is the final reward, don't increment
+  // the CurrentTensor, and just write 0.
+  for (size_t I = 0; I < TensorCount - 1; ++I) {
+    if (FinalReward)
+      WriteFeatureProto(reinterpret_cast<const char *>(&Zero));
+    else {
+      WriteFeatureProto(CurrentTensor);
+      CurrentTensor += TensorByteSize;
+    }
  }
+
+  WriteFeatureProto(CurrentTensor);
+
  OutFile << "    }\n";
  OutFile << "  }\n";
 }
-
-/// Write a list of tensors as a sequence of TensorFlow FeatureList protobufs.
-/// The tensors are assumed to be stored contiguously, in row-major format,
-/// in the TensorData buffer. Each tensor has the shape given by Spec. The
-/// feature name in the output is either the provided LoggingName, if
-/// specified, otherwise it's the name of the tensor (as given by Spec).
-template <typename T>
-void writeTensorsAsFeatureLists(raw_ostream &OutFile,
-                                const Logger::LoggedFeatureSpec &Spec,
-                                const T *TensorData, size_t TensorCount) {
-  writeRawTensorsAsFeatureLists(
-      OutFile, Spec, reinterpret_cast<const char *>(TensorData), TensorCount);
-}
 } // namespace

 namespace llvm {
@ -405,15 +412,19 @@ void Logger::print(raw_ostream &OS) {
  size_t NumberOfRecords = RawLogData[0].size() / Tensor0Size;
  if (NumberOfRecords == 0)
    return;
+  size_t RewardSize =
+      RewardSpec.getElementCount() * RewardSpec.getElementByteSize();
+  size_t NumberOfRewards = RawLogData.back().size() / RewardSize;

  OS << "feature_lists: {\n";
  for (size_t I = 0; I < FeatureSpecs.size(); ++I)
-    writeTensorsAsFeatureLists(OS, FeatureSpecs[I], RawLogData[I].data(),
-                               NumberOfRecords);
+    writeRawTensorsAsFeatureLists(OS, FeatureSpecs[I], RawLogData[I].data(),
+                                  NumberOfRecords);

  if (IncludeReward)
-    writeTensorsAsFeatureLists(OS, {RewardSpec, None}, RawLogData.back().data(),
-                               NumberOfRecords);
+    writeRawTensorsAsFeatureLists(OS, {RewardSpec, None},
+                                  RawLogData.back().data(), NumberOfRecords,
+                                  NumberOfRewards == 1);

  OS << "}\n";
 }
--- a/llvm/unittests/Analysis/TFUtilsTest.cpp
+++ b/llvm/unittests/Analysis/TFUtilsTest.cpp
@ -227,4 +227,47 @@ TEST(TFUtilsTest, LoggerNoReward) {
  raw_string_ostream OS(Result);
  L.print(OS);
  EXPECT_EQ(Result, Expected);
-}
+}
+
+TEST(TFUtilsTest, LoggerFinalReward) {
+  std::vector<Logger::LoggedFeatureSpec> Features;
+  Features.push_back({TensorSpec::createSpec<float>("the_float", {1}), None});
+  Features.push_back({TensorSpec::createSpec<int64_t>("the_int", {1}), None});
+
+  auto Rewards = TensorSpec::createSpec<float>("reward", {1});
+  Logger L(Features, Rewards, true);
+  for (size_t I = 0; I < 3; ++I) {
+    float F = static_cast<float>(I);
+    L.logTensorValue(0, &F);
+    L.logTensorValue(1, &I);
+  }
+  L.logFinalReward<float>(3.14);
+  const auto *Expected = R"(feature_lists: {
+  feature_list: {
+    key: "the_float" value: {
+      feature: { float_list: { value: [0.000000e+00] } }
+      feature: { float_list: { value: [1.000000e+00] } }
+      feature: { float_list: { value: [2.000000e+00] } }
+    }
+  }
+  feature_list: {
+    key: "the_int" value: {
+      feature: { int64_list: { value: [0] } }
+      feature: { int64_list: { value: [1] } }
+      feature: { int64_list: { value: [2] } }
+    }
+  }
+  feature_list: {
+    key: "reward" value: {
+      feature: { float_list: { value: [0.000000e+00] } }
+      feature: { float_list: { value: [0.000000e+00] } }
+      feature: { float_list: { value: [3.140000e+00] } }
+    }
+  }
+}
+)";
+  std::string Result;
+  raw_string_ostream OS(Result);
+  L.print(OS);
+  EXPECT_EQ(Result, Expected);
+}