forked from OSchip/llvm-project
[ML] Add final reward logging facility.
Allow logging final rewards. A final reward is logged only once, and is serialized as all-zero values, except for the last one. Differential Revision: https://reviews.llvm.org/D89626
This commit is contained in:
parent
7d8c19a4e9
commit
d454328ea8
|
@ -139,6 +139,11 @@ public:
|
|||
logTensorValue(RawLogData.size() - 1, &Value);
|
||||
}
|
||||
|
||||
template <typename T> void logFinalReward(T Value) {
|
||||
assert(RawLogData.back().empty());
|
||||
logReward(Value);
|
||||
}
|
||||
|
||||
template <typename T>
|
||||
void logTensorValue(size_t FeatureID, const T *Value, size_t Size = 1) {
|
||||
const char *Start = reinterpret_cast<const char *>(Value);
|
||||
|
|
|
@ -77,10 +77,15 @@ void writeTensorValues(raw_ostream &OutFile, const char *TensorData,
|
|||
OutFile << "]";
|
||||
}
|
||||
|
||||
/// Untyped implementation of the API above.
|
||||
/// Write a list of tensors as a sequence of TensorFlow FeatureList protobufs.
|
||||
/// The tensors are assumed to be stored contiguously, in row-major format,
|
||||
/// in the TensorData buffer. Each tensor has the shape given by Spec. The
|
||||
/// feature name in the output is either the provided LoggingName, if
|
||||
/// specified, otherwise it's the name of the tensor (as given by Spec).
|
||||
void writeRawTensorsAsFeatureLists(raw_ostream &OutFile,
|
||||
const Logger::LoggedFeatureSpec &LoggedSpec,
|
||||
const char *TensorData, size_t TensorCount) {
|
||||
const char *TensorData, size_t TensorCount,
|
||||
bool FinalReward = false) {
|
||||
const char *FieldName = "<invalid>";
|
||||
std::function<void(const char *)> ValueWriter;
|
||||
const auto &Spec = LoggedSpec.Spec;
|
||||
|
@ -115,29 +120,31 @@ void writeRawTensorsAsFeatureLists(raw_ostream &OutFile,
|
|||
<< "\" ";
|
||||
OutFile << "value: {\n";
|
||||
size_t TensorByteSize = Spec.getElementCount() * Spec.getElementByteSize();
|
||||
for (const char *P = TensorData,
|
||||
*E = TensorData + TensorByteSize * TensorCount;
|
||||
P < E; P += TensorByteSize) {
|
||||
|
||||
auto WriteFeatureProto = [&](const char *P) {
|
||||
OutFile << " feature: { " << FieldName << ": { value: ";
|
||||
ValueWriter(P);
|
||||
OutFile << " } }\n";
|
||||
};
|
||||
|
||||
const char *CurrentTensor = TensorData;
|
||||
static int64_t Zero = 0;
|
||||
// Write all but the last value. If this is the final reward, don't increment
|
||||
// the CurrentTensor, and just write 0.
|
||||
for (size_t I = 0; I < TensorCount - 1; ++I) {
|
||||
if (FinalReward)
|
||||
WriteFeatureProto(reinterpret_cast<const char *>(&Zero));
|
||||
else {
|
||||
WriteFeatureProto(CurrentTensor);
|
||||
CurrentTensor += TensorByteSize;
|
||||
}
|
||||
}
|
||||
|
||||
WriteFeatureProto(CurrentTensor);
|
||||
|
||||
OutFile << " }\n";
|
||||
OutFile << " }\n";
|
||||
}
|
||||
|
||||
/// Write a list of tensors as a sequence of TensorFlow FeatureList protobufs.
|
||||
/// The tensors are assumed to be stored contiguously, in row-major format,
|
||||
/// in the TensorData buffer. Each tensor has the shape given by Spec. The
|
||||
/// feature name in the output is either the provided LoggingName, if
|
||||
/// specified, otherwise it's the name of the tensor (as given by Spec).
|
||||
template <typename T>
|
||||
void writeTensorsAsFeatureLists(raw_ostream &OutFile,
|
||||
const Logger::LoggedFeatureSpec &Spec,
|
||||
const T *TensorData, size_t TensorCount) {
|
||||
writeRawTensorsAsFeatureLists(
|
||||
OutFile, Spec, reinterpret_cast<const char *>(TensorData), TensorCount);
|
||||
}
|
||||
} // namespace
|
||||
|
||||
namespace llvm {
|
||||
|
@ -405,15 +412,19 @@ void Logger::print(raw_ostream &OS) {
|
|||
size_t NumberOfRecords = RawLogData[0].size() / Tensor0Size;
|
||||
if (NumberOfRecords == 0)
|
||||
return;
|
||||
size_t RewardSize =
|
||||
RewardSpec.getElementCount() * RewardSpec.getElementByteSize();
|
||||
size_t NumberOfRewards = RawLogData.back().size() / RewardSize;
|
||||
|
||||
OS << "feature_lists: {\n";
|
||||
for (size_t I = 0; I < FeatureSpecs.size(); ++I)
|
||||
writeTensorsAsFeatureLists(OS, FeatureSpecs[I], RawLogData[I].data(),
|
||||
NumberOfRecords);
|
||||
writeRawTensorsAsFeatureLists(OS, FeatureSpecs[I], RawLogData[I].data(),
|
||||
NumberOfRecords);
|
||||
|
||||
if (IncludeReward)
|
||||
writeTensorsAsFeatureLists(OS, {RewardSpec, None}, RawLogData.back().data(),
|
||||
NumberOfRecords);
|
||||
writeRawTensorsAsFeatureLists(OS, {RewardSpec, None},
|
||||
RawLogData.back().data(), NumberOfRecords,
|
||||
NumberOfRewards == 1);
|
||||
|
||||
OS << "}\n";
|
||||
}
|
||||
|
|
|
@ -227,4 +227,47 @@ TEST(TFUtilsTest, LoggerNoReward) {
|
|||
raw_string_ostream OS(Result);
|
||||
L.print(OS);
|
||||
EXPECT_EQ(Result, Expected);
|
||||
}
|
||||
}
|
||||
|
||||
TEST(TFUtilsTest, LoggerFinalReward) {
|
||||
std::vector<Logger::LoggedFeatureSpec> Features;
|
||||
Features.push_back({TensorSpec::createSpec<float>("the_float", {1}), None});
|
||||
Features.push_back({TensorSpec::createSpec<int64_t>("the_int", {1}), None});
|
||||
|
||||
auto Rewards = TensorSpec::createSpec<float>("reward", {1});
|
||||
Logger L(Features, Rewards, true);
|
||||
for (size_t I = 0; I < 3; ++I) {
|
||||
float F = static_cast<float>(I);
|
||||
L.logTensorValue(0, &F);
|
||||
L.logTensorValue(1, &I);
|
||||
}
|
||||
L.logFinalReward<float>(3.14);
|
||||
const auto *Expected = R"(feature_lists: {
|
||||
feature_list: {
|
||||
key: "the_float" value: {
|
||||
feature: { float_list: { value: [0.000000e+00] } }
|
||||
feature: { float_list: { value: [1.000000e+00] } }
|
||||
feature: { float_list: { value: [2.000000e+00] } }
|
||||
}
|
||||
}
|
||||
feature_list: {
|
||||
key: "the_int" value: {
|
||||
feature: { int64_list: { value: [0] } }
|
||||
feature: { int64_list: { value: [1] } }
|
||||
feature: { int64_list: { value: [2] } }
|
||||
}
|
||||
}
|
||||
feature_list: {
|
||||
key: "reward" value: {
|
||||
feature: { float_list: { value: [0.000000e+00] } }
|
||||
feature: { float_list: { value: [0.000000e+00] } }
|
||||
feature: { float_list: { value: [3.140000e+00] } }
|
||||
}
|
||||
}
|
||||
}
|
||||
)";
|
||||
std::string Result;
|
||||
raw_string_ostream OS(Result);
|
||||
L.print(OS);
|
||||
EXPECT_EQ(Result, Expected);
|
||||
}
|
||||
|
|
Loading…
Reference in New Issue