[MD] sampler log update

This commit is contained in:
YangLuo 2020-09-29 14:29:33 +08:00
parent 86844a6a95
commit c36a36304c
4 changed files with 26 additions and 12 deletions

View File

@ -76,7 +76,10 @@ Status DistributedSampler::InitSampler() {
Status DistributedSampler::GetNextSample(std::unique_ptr<DataBuffer> *out_buffer) {
if (cnt_ > samples_per_buffer_) {
RETURN_STATUS_UNEXPECTED("Distributed Sampler Error");
RETURN_STATUS_UNEXPECTED(
"Number of samples(cnt) that have already been filled in to buffer should be less than or "
"equal to samples_per_buffer, but got cnt: " +
std::to_string(cnt_) + ", samples_per_buffer: " + std::to_string(samples_per_buffer_));
} else if (cnt_ == samples_per_buffer_ && (non_empty_ || !even_dist_)) {
(*out_buffer) = std::make_unique<DataBuffer>(0, DataBuffer::kDeBFlagEOE);
if (!samples_per_buffer_) {

View File

@ -53,7 +53,8 @@ Status PythonSampler::GetNextSample(std::unique_ptr<DataBuffer> *out_buffer) {
} catch (const py::error_already_set &e) {
return Status(StatusCode::kPyFuncException, e.what());
} catch (const py::cast_error &e) {
return Status(StatusCode::kPyFuncException, "Python Sampler iterator should return integer index");
return Status(StatusCode::kPyFuncException,
"Invalid data, python sampler iterator should return an integer index.");
}
}
TensorRow row(1, sample_ids);
@ -64,7 +65,8 @@ Status PythonSampler::GetNextSample(std::unique_ptr<DataBuffer> *out_buffer) {
}
Status PythonSampler::InitSampler() {
CHECK_FAIL_RETURN_UNEXPECTED(num_rows_ > 0, "ERROR num_rows_ should be greater than 0");
CHECK_FAIL_RETURN_UNEXPECTED(
num_rows_ > 0, "Invalid parameter, num_rows must be greater than 0, but got " + std::to_string(num_rows_));
// Special value of 0 for num_samples means that the user wants to sample the entire set of data.
// If the user asked to sample more rows than exists in the dataset, adjust the num_samples accordingly.
if (num_samples_ == 0 || num_samples_ > num_rows_) {

View File

@ -98,9 +98,8 @@ Status SubsetRandomSampler::GetNextSample(std::unique_ptr<DataBuffer> *out_buffe
auto id_ptr = outputIds->begin<int64_t>();
while (sample_id_ < last_id) {
if (indices_[sample_id_] >= num_rows_) {
std::string err_msg =
"Generated id is bigger than numRows (out of bound). indices_: " + std::to_string(indices_[sample_id_]) +
" num_rows_: " + std::to_string(num_rows_);
std::string err_msg = "Generated indice is out of bound, expect range [0, num_data-1], got indice: " +
std::to_string(indices_[sample_id_]) + ", num_data: " + std::to_string(num_rows_ - 1);
RETURN_STATUS_UNEXPECTED(err_msg);
}

View File

@ -50,11 +50,15 @@ Status WeightedRandomSampler::InitSampler() {
std::to_string(samples_per_buffer_) + ".\n");
if (weights_.size() > static_cast<size_t>(num_rows_)) {
return Status(StatusCode::kUnexpectedError, __LINE__, __FILE__,
"Invalid parameter, number of samples weights is more than num of rows. "
"Might generate id out of bound OR other errors");
"Invalid parameter, size of sample weights must be less than or equal to num of data, "
"otherwise might cause generated id out of bound or other errors, but got weight size: " +
std::to_string(weights_.size()) + ", num of data: " + std::to_string(num_rows_));
}
if (!replacement_ && (weights_.size() < static_cast<size_t>(num_samples_))) {
RETURN_STATUS_UNEXPECTED("Invalid parameter, without replacement, weights size must be greater than num_samples.");
RETURN_STATUS_UNEXPECTED(
"Invalid parameter, without replacement, weights size must be greater than or equal to num_samples, "
"but got weight size: " +
std::to_string(weights_.size()) + ", num_samples: " + std::to_string(num_samples_));
}
// Initialize random generator with seed from config manager
@ -110,11 +114,16 @@ Status WeightedRandomSampler::ResetSampler() {
Status WeightedRandomSampler::GetNextSample(std::unique_ptr<DataBuffer> *out_buffer) {
if (weights_.size() > static_cast<size_t>(num_rows_)) {
return Status(StatusCode::kUnexpectedError, __LINE__, __FILE__,
"number of samples weights is more than num of rows. Might generate id out of bound OR other errors");
"Invalid parameter, size of sample weights must be less than or equal to num of data, "
"otherwise might cause generated id out of bound or other errors, but got weight size: " +
std::to_string(weights_.size()) + ", num of data: " + std::to_string(num_rows_));
}
if (!replacement_ && (weights_.size() < static_cast<size_t>(num_samples_))) {
RETURN_STATUS_UNEXPECTED("Without replacement, sample weights less than numSamples");
RETURN_STATUS_UNEXPECTED(
"Invalid parameter, without replacement, weights size must be greater than or equal to num_samples, "
"but got weight size: " +
std::to_string(weights_.size()) + ", num_samples: " + std::to_string(num_samples_));
}
if (sample_id_ == num_samples_) {
@ -150,7 +159,8 @@ Status WeightedRandomSampler::GetNextSample(std::unique_ptr<DataBuffer> *out_buf
}
if (genId >= num_rows_) {
RETURN_STATUS_UNEXPECTED("generated id is bigger than numRows (out of bound).");
RETURN_STATUS_UNEXPECTED("Generated indice is out of bound, expect range [0, num_data-1], got indice: " +
std::to_string(genId) + ", num_data: " + std::to_string(num_rows_ - 1));
}
if (HasChildSampler()) {