forked from OSchip/llvm-project
[NFC][MLGO] Make logging more robust
1) add some self-diagnosis (when asserts are enabled) to check that all features have the same nr of entries 2) avoid storing pointers to mutable fields because the proto API contract doesn't actually guarantee those stay fixed even if no further mutation of the object occurs. Differential Revision: https://reviews.llvm.org/D107594
This commit is contained in:
parent
6385abd0c4
commit
ae1a2a09e4
|
@ -104,6 +104,9 @@ Optional<TensorSpec> getTensorSpecFromJSON(LLVMContext &Ctx,
|
||||||
struct LoggedFeatureSpec {
|
struct LoggedFeatureSpec {
|
||||||
TensorSpec Spec;
|
TensorSpec Spec;
|
||||||
Optional<std::string> LoggingName;
|
Optional<std::string> LoggingName;
|
||||||
|
const std::string &getLoggingName() const {
|
||||||
|
return LoggingName ? *LoggingName : Spec.name();
|
||||||
|
}
|
||||||
};
|
};
|
||||||
|
|
||||||
/// Load the output specs. If SpecFileOverride is not empty, that path is used.
|
/// Load the output specs. If SpecFileOverride is not empty, that path is used.
|
||||||
|
@ -170,7 +173,9 @@ public:
|
||||||
// we can consider using bytes.
|
// we can consider using bytes.
|
||||||
char *addEntryAndGetFloatOrInt64Buffer(size_t FeatureID);
|
char *addEntryAndGetFloatOrInt64Buffer(size_t FeatureID);
|
||||||
|
|
||||||
void print(raw_ostream &OS);
|
// Flush the content of the log to the stream, clearing the stored data in the
|
||||||
|
// process.
|
||||||
|
void flush(raw_ostream &OS);
|
||||||
|
|
||||||
private:
|
private:
|
||||||
std::vector<LoggedFeatureSpec> FeatureSpecs;
|
std::vector<LoggedFeatureSpec> FeatureSpecs;
|
||||||
|
|
|
@ -377,7 +377,7 @@ void TrainingLogger::logInlineEvent(const InlineEvent &Event,
|
||||||
void TrainingLogger::print() {
|
void TrainingLogger::print() {
|
||||||
std::error_code EC;
|
std::error_code EC;
|
||||||
raw_fd_ostream OutFile(LogFileName, EC);
|
raw_fd_ostream OutFile(LogFileName, EC);
|
||||||
L->print(OutFile);
|
L->flush(OutFile);
|
||||||
}
|
}
|
||||||
|
|
||||||
DevelopmentModeMLInlineAdvisor::DevelopmentModeMLInlineAdvisor(
|
DevelopmentModeMLInlineAdvisor::DevelopmentModeMLInlineAdvisor(
|
||||||
|
|
|
@ -262,29 +262,57 @@ private:
|
||||||
class LoggerDataImpl {
|
class LoggerDataImpl {
|
||||||
const std::vector<LoggedFeatureSpec> LoggedFeatureSpecs;
|
const std::vector<LoggedFeatureSpec> LoggedFeatureSpecs;
|
||||||
const TensorSpec RewardSpec;
|
const TensorSpec RewardSpec;
|
||||||
|
const bool IncludeReward;
|
||||||
|
|
||||||
tensorflow::SequenceExample SE;
|
std::vector<tensorflow::FeatureList> FeatureLists;
|
||||||
std::vector<tensorflow::FeatureList *> FeatureLists;
|
tensorflow::FeatureList Reward;
|
||||||
tensorflow::FeatureList *Reward = nullptr;
|
|
||||||
|
bool isSelfConsistent(const tensorflow::SequenceExample &SE,
|
||||||
|
size_t NrRecords) const {
|
||||||
|
bool Ret = true;
|
||||||
|
for (const auto &TSpecs : LoggedFeatureSpecs) {
|
||||||
|
const auto &Name = TSpecs.getLoggingName();
|
||||||
|
const auto &FL = SE.feature_lists().feature_list().at(Name).feature();
|
||||||
|
if (NrRecords != static_cast<size_t>(FL.size())) {
|
||||||
|
dbgs() << "[TF-UTILS]: " << Name << " has missing records. Expected "
|
||||||
|
<< NrRecords << " got " << FL.size() << "\n";
|
||||||
|
Ret = false;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
if (IncludeReward && static_cast<size_t>(SE.feature_lists()
|
||||||
|
.feature_list()
|
||||||
|
.at(RewardSpec.name())
|
||||||
|
.feature()
|
||||||
|
.size()) != NrRecords) {
|
||||||
|
dbgs() << "[TF-UTILS]: reward is missing records.\n";
|
||||||
|
Ret = false;
|
||||||
|
}
|
||||||
|
return Ret;
|
||||||
|
}
|
||||||
|
|
||||||
|
void transferLog(tensorflow::SequenceExample &SE) {
|
||||||
|
auto *FL = SE.mutable_feature_lists()->mutable_feature_list();
|
||||||
|
if (IncludeReward)
|
||||||
|
(*FL)[RewardSpec.name()].Swap(&Reward);
|
||||||
|
assert(FeatureLists.size() == LoggedFeatureSpecs.size());
|
||||||
|
for (size_t I = 0; I < FeatureLists.size(); ++I) {
|
||||||
|
const auto &LFS = LoggedFeatureSpecs[I];
|
||||||
|
(*FL)[LFS.getLoggingName()].Swap(&FeatureLists[I]);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
public:
|
public:
|
||||||
LoggerDataImpl(const std::vector<LoggedFeatureSpec> &LoggedSpecs,
|
LoggerDataImpl(const std::vector<LoggedFeatureSpec> &LoggedSpecs,
|
||||||
const TensorSpec &RewardSpec, bool IncludeReward)
|
const TensorSpec &RewardSpec, bool IncludeReward)
|
||||||
: LoggedFeatureSpecs(LoggedSpecs), RewardSpec(RewardSpec) {
|
: LoggedFeatureSpecs(LoggedSpecs), RewardSpec(RewardSpec),
|
||||||
auto *FL = SE.mutable_feature_lists()->mutable_feature_list();
|
IncludeReward(IncludeReward), FeatureLists(LoggedFeatureSpecs.size()) {}
|
||||||
if (IncludeReward)
|
|
||||||
Reward = &(*FL)[RewardSpec.name()];
|
|
||||||
// Allocate first the map entries, then capture their address. We will not
|
|
||||||
// mutate the set of features after this (i.e. the pointers won't dangle).
|
|
||||||
for (const auto &LFS : LoggedSpecs) {
|
|
||||||
(*FL)[LFS.LoggingName ? *LFS.LoggingName : LFS.Spec.name()] = {};
|
|
||||||
}
|
|
||||||
for (const auto &LFS : LoggedSpecs)
|
|
||||||
FeatureLists.push_back(
|
|
||||||
&(*FL)[LFS.LoggingName ? *LFS.LoggingName : LFS.Spec.name()]);
|
|
||||||
}
|
|
||||||
|
|
||||||
void print(raw_ostream &OS) {
|
// flush the logged info to a stream and clear the log contents.
|
||||||
|
void flush(raw_ostream &OS) {
|
||||||
|
size_t NrRecords = getNrRecords();
|
||||||
|
tensorflow::SequenceExample SE;
|
||||||
|
transferLog(SE);
|
||||||
|
assert(isSelfConsistent(SE, NrRecords));
|
||||||
std::string OutStr;
|
std::string OutStr;
|
||||||
if (ProtobufTextMode)
|
if (ProtobufTextMode)
|
||||||
google::protobuf::TextFormat::PrintToString(SE, &OutStr);
|
google::protobuf::TextFormat::PrintToString(SE, &OutStr);
|
||||||
|
@ -298,14 +326,14 @@ public:
|
||||||
const auto &Spec = LoggedFeatureSpecs[FeatureID].Spec;
|
const auto &Spec = LoggedFeatureSpecs[FeatureID].Spec;
|
||||||
if (Spec.isElementType<float>()) {
|
if (Spec.isElementType<float>()) {
|
||||||
auto *RF = FeatureLists[FeatureID]
|
auto *RF = FeatureLists[FeatureID]
|
||||||
->add_feature()
|
.add_feature()
|
||||||
->mutable_float_list()
|
->mutable_float_list()
|
||||||
->mutable_value();
|
->mutable_value();
|
||||||
RF->Resize(Spec.getElementCount(), 0.0);
|
RF->Resize(Spec.getElementCount(), 0.0);
|
||||||
return reinterpret_cast<char *>(RF->mutable_data());
|
return reinterpret_cast<char *>(RF->mutable_data());
|
||||||
} else if (Spec.isElementType<int32_t>() || Spec.isElementType<int64_t>()) {
|
} else if (Spec.isElementType<int32_t>() || Spec.isElementType<int64_t>()) {
|
||||||
auto *RF = FeatureLists[FeatureID]
|
auto *RF = FeatureLists[FeatureID]
|
||||||
->add_feature()
|
.add_feature()
|
||||||
->mutable_int64_list()
|
->mutable_int64_list()
|
||||||
->mutable_value();
|
->mutable_value();
|
||||||
RF->Resize(Spec.getElementCount(), 0);
|
RF->Resize(Spec.getElementCount(), 0);
|
||||||
|
@ -315,17 +343,18 @@ public:
|
||||||
}
|
}
|
||||||
|
|
||||||
template <typename T> void logReward(T Value) {
|
template <typename T> void logReward(T Value) {
|
||||||
|
assert(IncludeReward);
|
||||||
if (RewardSpec.isElementType<float>())
|
if (RewardSpec.isElementType<float>())
|
||||||
Reward->add_feature()->mutable_float_list()->add_value(Value);
|
Reward.add_feature()->mutable_float_list()->add_value(Value);
|
||||||
else if (RewardSpec.isElementType<int32_t>() ||
|
else if (RewardSpec.isElementType<int32_t>() ||
|
||||||
RewardSpec.isElementType<int64_t>())
|
RewardSpec.isElementType<int64_t>())
|
||||||
Reward->add_feature()->mutable_int64_list()->add_value(Value);
|
Reward.add_feature()->mutable_int64_list()->add_value(Value);
|
||||||
else
|
else
|
||||||
llvm_unreachable("Unsupported tensor type.");
|
llvm_unreachable("Unsupported tensor type.");
|
||||||
}
|
}
|
||||||
|
|
||||||
size_t getNrRecords() const {
|
size_t getNrRecords() const {
|
||||||
return FeatureLists.empty() ? 0 : FeatureLists[0]->feature().size();
|
return FeatureLists.empty() ? 0 : FeatureLists[0].feature().size();
|
||||||
}
|
}
|
||||||
};
|
};
|
||||||
} // namespace llvm
|
} // namespace llvm
|
||||||
|
@ -538,5 +567,5 @@ char *Logger::addEntryAndGetFloatOrInt64Buffer(size_t FeatureID) {
|
||||||
return reinterpret_cast<char *>(LoggerData->addNewTensor(FeatureID));
|
return reinterpret_cast<char *>(LoggerData->addNewTensor(FeatureID));
|
||||||
}
|
}
|
||||||
|
|
||||||
void Logger::print(raw_ostream &OS) { LoggerData->print(OS); }
|
void Logger::flush(raw_ostream &OS) { LoggerData->flush(OS); }
|
||||||
#endif // defined(LLVM_HAVE_TF_API)
|
#endif // defined(LLVM_HAVE_TF_API)
|
||||||
|
|
|
@ -179,7 +179,7 @@ TEST(TFUtilsTest, Logger) {
|
||||||
L.logFloatReward(-3.0);
|
L.logFloatReward(-3.0);
|
||||||
std::string Result;
|
std::string Result;
|
||||||
raw_string_ostream OS(Result);
|
raw_string_ostream OS(Result);
|
||||||
L.print(OS);
|
L.flush(OS);
|
||||||
|
|
||||||
tensorflow::SequenceExample Expected;
|
tensorflow::SequenceExample Expected;
|
||||||
EXPECT_TRUE(Expected.ParseFromString(Result));
|
EXPECT_TRUE(Expected.ParseFromString(Result));
|
||||||
|
@ -215,7 +215,7 @@ TEST(TFUtilsTest, LoggerInt32FeaturesAndReward) {
|
||||||
L.logInt32Reward(-3);
|
L.logInt32Reward(-3);
|
||||||
std::string Result;
|
std::string Result;
|
||||||
raw_string_ostream OS(Result);
|
raw_string_ostream OS(Result);
|
||||||
L.print(OS);
|
L.flush(OS);
|
||||||
|
|
||||||
tensorflow::SequenceExample Expected;
|
tensorflow::SequenceExample Expected;
|
||||||
EXPECT_TRUE(Expected.ParseFromString(Result));
|
EXPECT_TRUE(Expected.ParseFromString(Result));
|
||||||
|
@ -250,7 +250,7 @@ TEST(TFUtilsTest, LoggerNoReward) {
|
||||||
|
|
||||||
std::string Result;
|
std::string Result;
|
||||||
raw_string_ostream OS(Result);
|
raw_string_ostream OS(Result);
|
||||||
L.print(OS);
|
L.flush(OS);
|
||||||
tensorflow::SequenceExample Expected;
|
tensorflow::SequenceExample Expected;
|
||||||
EXPECT_TRUE(Expected.ParseFromString(Result));
|
EXPECT_TRUE(Expected.ParseFromString(Result));
|
||||||
PROTO_CHECKER("the_float", float_list, 0, F00);
|
PROTO_CHECKER("the_float", float_list, 0, F00);
|
||||||
|
@ -274,7 +274,7 @@ TEST(TFUtilsTest, LoggerFinalReward) {
|
||||||
L.logFloatFinalReward(3.14);
|
L.logFloatFinalReward(3.14);
|
||||||
std::string Result;
|
std::string Result;
|
||||||
raw_string_ostream OS(Result);
|
raw_string_ostream OS(Result);
|
||||||
L.print(OS);
|
L.flush(OS);
|
||||||
const float Zero[]{0.0};
|
const float Zero[]{0.0};
|
||||||
const float R[]{3.14};
|
const float R[]{3.14};
|
||||||
tensorflow::SequenceExample Expected;
|
tensorflow::SequenceExample Expected;
|
||||||
|
|
Loading…
Reference in New Issue