forked from mindspore-Ecosystem/mindspore
!21389 Add Tensor Base and Stat info to offline debugger
Merge pull request !21389 from parastooashtari/tensor_info_levels
This commit is contained in:
commit
2edaba38bf
|
@ -131,6 +131,30 @@ std::unique_ptr<ITensorSummary> GetSummaryPtr(const std::shared_ptr<TensorData>
|
|||
}
|
||||
}
|
||||
|
||||
DebugServices::TensorStat DebugServices::GetTensorStatistics(const std::shared_ptr<TensorData> &tensor) {
|
||||
if (tensor == nullptr) {
|
||||
MS_LOG(WARNING) << "Tensor is nullptr, returning empty tensor statistics.";
|
||||
TensorStat empty_tensor_stat_data;
|
||||
return empty_tensor_stat_data;
|
||||
}
|
||||
std::unique_ptr<ITensorSummary> base_summary_ptr;
|
||||
void *previous_tensor_ptr = nullptr;
|
||||
base_summary_ptr = GetSummaryPtr(tensor, previous_tensor_ptr, tensor->GetNumElements(), tensor->GetType());
|
||||
if (base_summary_ptr == nullptr) {
|
||||
MS_LOG(WARNING) << "base_summary_ptr is nullptr, returning empty tensor statistics.";
|
||||
TensorStat empty_tensor_stat_data;
|
||||
return empty_tensor_stat_data;
|
||||
}
|
||||
base_summary_ptr->TensorStatistics(tensor->GetType());
|
||||
TensorStat tensor_stat_data(tensor->GetByteSize(), tensor->GetType(), tensor->GetShape(), base_summary_ptr->is_bool(),
|
||||
base_summary_ptr->max_value(), base_summary_ptr->min_value(),
|
||||
base_summary_ptr->avg_value(), base_summary_ptr->count(),
|
||||
base_summary_ptr->neg_zero_count(), base_summary_ptr->pos_zero_count(),
|
||||
base_summary_ptr->nan_count(), base_summary_ptr->neg_inf_count(),
|
||||
base_summary_ptr->pos_inf_count(), base_summary_ptr->zero_count());
|
||||
|
||||
return tensor_stat_data;
|
||||
}
|
||||
#ifdef OFFLINE_DBG_MODE
|
||||
void *DebugServices::GetPrevTensor(const std::shared_ptr<TensorData> &tensor, bool previous_iter_tensor_needed) {
|
||||
void *previous_tensor_ptr = nullptr;
|
||||
|
@ -317,7 +341,11 @@ void DebugServices::CheckWatchpoints(std::vector<std::string> *const name, std::
|
|||
MS_LOG(INFO) << "tensor list size: " << tensor_list_size;
|
||||
if (tensor_list_size == 0) return;
|
||||
// default value for number of threads
|
||||
const int max_thread_num = 32;
|
||||
const int default_thread_num = 32;
|
||||
int max_thread_num = default_thread_num;
|
||||
if (max_thread_num > tensor_list_size) {
|
||||
max_thread_num = tensor_list_size;
|
||||
}
|
||||
MS_LOG(INFO) << "Number of threads used for checkwatchpoint: " << max_thread_num;
|
||||
int chunk_size = tensor_list_size / max_thread_num;
|
||||
int remainder = tensor_list_size % max_thread_num;
|
||||
|
@ -757,78 +785,100 @@ void DebugServices::ReadDumpedTensor(std::vector<std::string> backend_name, std:
|
|||
std::to_string(root_graph_id[i]) + "/" + IterationString(iteration[i]);
|
||||
|
||||
// search files in dir for the one that meets the filename prefix and read the file into memory
|
||||
std::vector<char> *buffer = NULL;
|
||||
std::string type_name = "";
|
||||
std::vector<int64_t> shape;
|
||||
uint64_t data_size = 0;
|
||||
if (is_sync_mode_) {
|
||||
std::string abspath = RealPath(specific_dump_dir);
|
||||
DIR *d = opendir(abspath.c_str());
|
||||
bool found_file = false;
|
||||
std::vector<std::string> matched_paths;
|
||||
if (d == nullptr) {
|
||||
MS_LOG(ERROR) << "Directory " << specific_dump_dir << " does not exist!";
|
||||
} else {
|
||||
struct dirent *dir = nullptr;
|
||||
while ((dir = readdir(d)) != NULL) {
|
||||
if (dir->d_type == DT_REG) {
|
||||
std::string file_name = dir->d_name;
|
||||
std::string stripped_file_name = GetStrippedFilename(file_name);
|
||||
if (stripped_file_name.empty()) {
|
||||
continue;
|
||||
}
|
||||
std::size_t found = stripped_file_name.rfind(prefix_dump_file_name, 0);
|
||||
if (found != 0) {
|
||||
continue;
|
||||
}
|
||||
|
||||
std::string full_path = specific_dump_dir + "/" + file_name;
|
||||
matched_paths.push_back(full_path);
|
||||
found_file = true;
|
||||
}
|
||||
}
|
||||
(void)closedir(d);
|
||||
}
|
||||
|
||||
if (found_file) {
|
||||
shape.clear();
|
||||
std::string result_path = GetNewestFilePath(matched_paths);
|
||||
ReadTensorFromNpy(result_path, &type_name, &data_size, &shape, &buffer);
|
||||
AddToTensorData(backend_name[i], slot[i], iteration[i], device_id[i], root_graph_id[i], is_output[i], data_size,
|
||||
type_name, shape, buffer, result_list);
|
||||
} else {
|
||||
AddToTensorData(backend_name[i], slot[i], iteration[i], device_id[i], root_graph_id[i], is_output[i], 0,
|
||||
type_name, shape, buffer, result_list);
|
||||
MS_LOG(INFO) << "Target tensor has not been found.";
|
||||
}
|
||||
ReadDumpedTensorSync(prefix_dump_file_name, specific_dump_dir, backend_name[i], slot[i], device_id[i],
|
||||
iteration[i], root_graph_id[i], is_output[i], result_list);
|
||||
} else {
|
||||
bool found = false;
|
||||
std::vector<std::string> matched_paths;
|
||||
// if async mode
|
||||
for (const std::string &file_path : async_file_pool) {
|
||||
if (file_path.find(specific_dump_dir) != std::string::npos &&
|
||||
file_path.find(prefix_dump_to_check) != std::string::npos &&
|
||||
file_path.find(slot_string_to_check) != std::string::npos) {
|
||||
matched_paths.push_back(file_path);
|
||||
found = true;
|
||||
}
|
||||
}
|
||||
if (found) {
|
||||
shape.clear();
|
||||
std::string result_path = GetNewestFilePath(matched_paths);
|
||||
ReadTensorFromNpy(result_path, &type_name, &data_size, &shape, &buffer);
|
||||
AddToTensorData(backend_name[i], slot[i], iteration[i], device_id[i], root_graph_id[i], is_output[i], data_size,
|
||||
type_name, shape, buffer, result_list);
|
||||
} else {
|
||||
// If no npy file is found, add empty tensor data.
|
||||
AddToTensorData(backend_name[i], slot[i], iteration[i], device_id[i], root_graph_id[i], is_output[i], 0,
|
||||
type_name, shape, buffer, result_list);
|
||||
MS_LOG(INFO) << "Target tensor has not been found.";
|
||||
}
|
||||
ReadDumpedTensorAsync(specific_dump_dir, prefix_dump_to_check, slot_string_to_check, backend_name[i], slot[i],
|
||||
device_id[i], iteration[i], root_graph_id[i], is_output[i], async_file_pool, result_list);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void DebugServices::ReadDumpedTensorSync(const std::string &prefix_dump_file_name, const std::string &specific_dump_dir,
|
||||
const std::string &backend_name, size_t slot, unsigned int device_id,
|
||||
unsigned int iteration, unsigned int root_graph_id, const bool &is_output,
|
||||
std::vector<std::shared_ptr<TensorData>> *result_list) {
|
||||
std::vector<char> *buffer = NULL;
|
||||
std::string type_name = "";
|
||||
std::vector<int64_t> shape;
|
||||
uint64_t data_size = 0;
|
||||
std::string abspath = RealPath(specific_dump_dir);
|
||||
DIR *d = opendir(abspath.c_str());
|
||||
bool found_file = false;
|
||||
std::vector<std::string> matched_paths;
|
||||
if (d == nullptr) {
|
||||
MS_LOG(ERROR) << "Directory " << specific_dump_dir << " does not exist!";
|
||||
return;
|
||||
}
|
||||
struct dirent *dir = nullptr;
|
||||
while ((dir = readdir(d)) != NULL) {
|
||||
if (dir->d_type == DT_REG) {
|
||||
std::string file_name = dir->d_name;
|
||||
std::string stripped_file_name = GetStrippedFilename(file_name);
|
||||
if (stripped_file_name.empty()) {
|
||||
continue;
|
||||
}
|
||||
std::size_t found = stripped_file_name.rfind(prefix_dump_file_name, 0);
|
||||
if (found != 0) {
|
||||
continue;
|
||||
}
|
||||
|
||||
std::string full_path = specific_dump_dir + "/" + file_name;
|
||||
matched_paths.push_back(full_path);
|
||||
found_file = true;
|
||||
}
|
||||
}
|
||||
|
||||
if (found_file) {
|
||||
shape.clear();
|
||||
std::string result_path = GetNewestFilePath(matched_paths);
|
||||
ReadTensorFromNpy(result_path, &type_name, &data_size, &shape, &buffer);
|
||||
AddToTensorData(backend_name, slot, iteration, device_id, root_graph_id, is_output, data_size, type_name, shape,
|
||||
buffer, result_list);
|
||||
} else {
|
||||
AddToTensorData(backend_name, slot, iteration, device_id, root_graph_id, is_output, 0, type_name, shape, buffer,
|
||||
result_list);
|
||||
MS_LOG(INFO) << "Target tensor has not been found.";
|
||||
}
|
||||
(void)closedir(d);
|
||||
}
|
||||
|
||||
void DebugServices::ReadDumpedTensorAsync(const std::string &specific_dump_dir, const std::string &prefix_dump_to_check,
|
||||
const std::string &slot_string_to_check, const std::string &backend_name,
|
||||
size_t slot, unsigned int device_id, unsigned int iteration,
|
||||
unsigned int root_graph_id, const bool &is_output,
|
||||
const std::vector<std::string> &async_file_pool,
|
||||
std::vector<std::shared_ptr<TensorData>> *result_list) {
|
||||
std::vector<char> *buffer = NULL;
|
||||
std::string type_name = "";
|
||||
std::vector<int64_t> shape;
|
||||
uint64_t data_size = 0;
|
||||
bool found = false;
|
||||
std::vector<std::string> matched_paths;
|
||||
// if async mode
|
||||
for (const std::string &file_path : async_file_pool) {
|
||||
if (file_path.find(specific_dump_dir) != std::string::npos &&
|
||||
file_path.find(prefix_dump_to_check) != std::string::npos &&
|
||||
file_path.find(slot_string_to_check) != std::string::npos) {
|
||||
matched_paths.push_back(file_path);
|
||||
found = true;
|
||||
}
|
||||
}
|
||||
if (found) {
|
||||
shape.clear();
|
||||
std::string result_path = GetNewestFilePath(matched_paths);
|
||||
ReadTensorFromNpy(result_path, &type_name, &data_size, &shape, &buffer);
|
||||
AddToTensorData(backend_name, slot, iteration, device_id, root_graph_id, is_output, data_size, type_name, shape,
|
||||
buffer, result_list);
|
||||
} else {
|
||||
// If no npy file is found, add empty tensor data.
|
||||
AddToTensorData(backend_name, slot, iteration, device_id, root_graph_id, is_output, 0, type_name, shape, buffer,
|
||||
result_list);
|
||||
MS_LOG(INFO) << "Target tensor has not been found.";
|
||||
}
|
||||
}
|
||||
|
||||
std::string DebugServices::GetStrippedFilename(const std::string &file_name) {
|
||||
// strip off the task_id, stream_id, and timestamp, then compare
|
||||
size_t first_dot = file_name.find(".");
|
||||
|
|
|
@ -186,6 +186,45 @@ class DebugServices {
|
|||
}
|
||||
};
|
||||
|
||||
struct TensorStat {
|
||||
TensorStat(uint64_t data_size, int dtype, const std::vector<int64_t> &shape, bool is_bool, double max_value,
|
||||
double min_value, double avg_value, int count, int neg_zero_count, int pos_zero_count, int nan_count,
|
||||
int neg_inf_count, int pos_inf_count, int zero_count)
|
||||
: data_size(data_size),
|
||||
dtype(dtype),
|
||||
shape(shape),
|
||||
is_bool(is_bool),
|
||||
max_value(max_value),
|
||||
min_value(min_value),
|
||||
avg_value(avg_value),
|
||||
count(count),
|
||||
neg_zero_count(neg_zero_count),
|
||||
pos_zero_count(pos_zero_count),
|
||||
nan_count(nan_count),
|
||||
neg_inf_count(neg_inf_count),
|
||||
pos_inf_count(pos_inf_count),
|
||||
zero_count(zero_count) {}
|
||||
|
||||
TensorStat() = default;
|
||||
|
||||
uint64_t data_size = 0;
|
||||
int dtype = 0;
|
||||
std::vector<int64_t> shape = {0};
|
||||
bool is_bool = false;
|
||||
double max_value = std::numeric_limits<double>::lowest();
|
||||
double min_value = std::numeric_limits<double>::max();
|
||||
double avg_value = 0.0;
|
||||
int count = 0;
|
||||
int neg_zero_count = 0;
|
||||
int pos_zero_count = 0;
|
||||
int nan_count = 0;
|
||||
int neg_inf_count = 0;
|
||||
int pos_inf_count = 0;
|
||||
int zero_count = 0;
|
||||
};
|
||||
|
||||
TensorStat GetTensorStatistics(const std::shared_ptr<TensorData> &tensor);
|
||||
|
||||
void AddWatchpoint(
|
||||
unsigned int id, unsigned int watch_condition, float parameter,
|
||||
const std::vector<std::tuple<std::string, bool>> &check_node_list, const std::vector<parameter_t> ¶meter_list,
|
||||
|
@ -233,6 +272,17 @@ class DebugServices {
|
|||
const std::vector<std::string> &async_file_pool,
|
||||
std::vector<std::shared_ptr<TensorData>> *result_list);
|
||||
|
||||
void ReadDumpedTensorSync(const std::string &prefix_dump_file_name, const std::string &specific_dump_dir,
|
||||
const std::string &backend_name, size_t slot, unsigned int device_id,
|
||||
unsigned int iteration, unsigned int root_graph_id, const bool &is_output,
|
||||
std::vector<std::shared_ptr<TensorData>> *result_list);
|
||||
|
||||
void ReadDumpedTensorAsync(const std::string &specific_dump_dir, const std::string &prefix_dump_to_check,
|
||||
const std::string &slot_string_to_check, const std::string &backend_name, size_t slot,
|
||||
unsigned int device_id, unsigned int iteration, unsigned int root_graph_id,
|
||||
const bool &is_output, const std::vector<std::string> &async_file_pool,
|
||||
std::vector<std::shared_ptr<TensorData>> *result_list);
|
||||
|
||||
std::vector<std::shared_ptr<TensorData>> ReadNeededDumpedTensors(unsigned int iteration,
|
||||
std::vector<std::string> *async_file_pool);
|
||||
|
||||
|
|
|
@ -226,7 +226,7 @@ unsigned int GetTensorSlot(tensor_info_t info) { return info.slot; }
|
|||
|
||||
bool GetTensorIsOutput(tensor_info_t info) { return info.is_output; }
|
||||
|
||||
std::vector<tensor_data_t> DbgServices::ReadTensors(std::vector<tensor_info_t> info) {
|
||||
std::vector<std::shared_ptr<TensorData>> DbgServices::ReadTensorsUtil(std::vector<tensor_info_t> info) {
|
||||
for (auto i : info) {
|
||||
MS_LOG(INFO) << "cpp DbgServices ReadTensor info name " << i.node_name << ", slot " << i.slot << ", iteration "
|
||||
<< i.iteration << ", rank_id " << i.rank_id << ", root_graph_id " << i.root_graph_id << ", is_output "
|
||||
|
@ -238,7 +238,6 @@ std::vector<tensor_data_t> DbgServices::ReadTensors(std::vector<tensor_info_t> i
|
|||
std::vector<unsigned int> iteration;
|
||||
std::vector<size_t> slot;
|
||||
std::vector<std::shared_ptr<TensorData>> result_list;
|
||||
std::vector<tensor_data_t> tensors_read;
|
||||
std::vector<bool> is_output;
|
||||
|
||||
std::transform(info.begin(), info.end(), std::back_inserter(backend_name), GetTensorFullName);
|
||||
|
@ -264,10 +263,60 @@ std::vector<tensor_data_t> DbgServices::ReadTensors(std::vector<tensor_info_t> i
|
|||
MS_LOG(INFO) << "ReadTensors Took: " << ms_double.count() / 1000 << "s";
|
||||
MS_LOG(INFO) << "cpp after";
|
||||
|
||||
return result_list;
|
||||
}
|
||||
|
||||
std::vector<tensor_data_t> DbgServices::ReadTensors(std::vector<tensor_info_t> info) {
|
||||
std::vector<tensor_data_t> tensors_read;
|
||||
std::vector<std::shared_ptr<TensorData>> result_list;
|
||||
result_list = ReadTensorsUtil(info);
|
||||
for (auto result : result_list) {
|
||||
tensor_data_t tensor_data_item(result->GetDataPtr(), result->GetByteSize(), result->GetType(), result->GetShape());
|
||||
tensors_read.push_back(tensor_data_item);
|
||||
}
|
||||
MS_LOG(INFO) << "cpp end";
|
||||
return tensors_read;
|
||||
}
|
||||
|
||||
std::vector<TensorBaseData> DbgServices::ReadTensorsBase(std::vector<tensor_info_t> info) {
|
||||
std::vector<TensorBaseData> tensors_read_base;
|
||||
std::vector<std::shared_ptr<TensorData>> result_list;
|
||||
result_list = ReadTensorsUtil(info);
|
||||
for (auto result : result_list) {
|
||||
if (!result->GetByteSize()) {
|
||||
// tensor not found, adding empty tensor base.
|
||||
TensorBaseData tensor_data_item(0, 0, {0});
|
||||
tensors_read_base.push_back(tensor_data_item);
|
||||
continue;
|
||||
}
|
||||
TensorBaseData tensor_data_item(result->GetByteSize(), result->GetType(), result->GetShape());
|
||||
tensors_read_base.push_back(tensor_data_item);
|
||||
}
|
||||
return tensors_read_base;
|
||||
}
|
||||
|
||||
std::vector<TensorStatData> DbgServices::ReadTensorsStat(std::vector<tensor_info_t> info) {
|
||||
std::vector<TensorStatData> tensors_read_stat;
|
||||
std::vector<std::shared_ptr<TensorData>> result_list;
|
||||
result_list = ReadTensorsUtil(info);
|
||||
for (auto result : result_list) {
|
||||
if (!result->GetByteSize()) {
|
||||
DebugServices::TensorStat tensor_statistics;
|
||||
TensorStatData tensor_data_item(
|
||||
tensor_statistics.data_size, tensor_statistics.dtype, tensor_statistics.shape, tensor_statistics.is_bool,
|
||||
tensor_statistics.max_value, tensor_statistics.min_value, tensor_statistics.avg_value, tensor_statistics.count,
|
||||
tensor_statistics.neg_zero_count, tensor_statistics.pos_zero_count, tensor_statistics.nan_count,
|
||||
tensor_statistics.neg_inf_count, tensor_statistics.pos_inf_count, tensor_statistics.zero_count);
|
||||
tensors_read_stat.push_back(tensor_data_item);
|
||||
continue;
|
||||
}
|
||||
DebugServices::TensorStat tensor_statistics = debug_services_->GetTensorStatistics(result);
|
||||
TensorStatData tensor_data_item(
|
||||
tensor_statistics.data_size, tensor_statistics.dtype, tensor_statistics.shape, tensor_statistics.is_bool,
|
||||
tensor_statistics.max_value, tensor_statistics.min_value, tensor_statistics.avg_value, tensor_statistics.count,
|
||||
tensor_statistics.neg_zero_count, tensor_statistics.pos_zero_count, tensor_statistics.nan_count,
|
||||
tensor_statistics.neg_inf_count, tensor_statistics.pos_inf_count, tensor_statistics.zero_count);
|
||||
tensors_read_stat.push_back(tensor_data_item);
|
||||
}
|
||||
|
||||
return tensors_read_stat;
|
||||
}
|
||||
|
|
|
@ -117,6 +117,68 @@ struct tensor_data_t {
|
|||
std::vector<int64_t> shape;
|
||||
};
|
||||
|
||||
struct TensorBaseData {
|
||||
TensorBaseData(uint64_t data_size, int dtype, const std::vector<int64_t> &shape)
|
||||
: data_size_(data_size), dtype_(dtype), shape_(shape) {}
|
||||
|
||||
const uint64_t data_size() const { return data_size_; }
|
||||
const int dtype() const { return dtype_; }
|
||||
const std::vector<int64_t> &shape() const { return shape_; }
|
||||
uint64_t data_size_;
|
||||
int dtype_;
|
||||
std::vector<int64_t> shape_;
|
||||
};
|
||||
|
||||
struct TensorStatData {
|
||||
TensorStatData(uint64_t data_size, int dtype, const std::vector<int64_t> &shape, bool is_bool, double max_value,
|
||||
double min_value, double avg_value, int count, int neg_zero_count, int pos_zero_count, int nan_count,
|
||||
int neg_inf_count, int pos_inf_count, int zero_count)
|
||||
: data_size_(data_size),
|
||||
dtype_(dtype),
|
||||
shape_(shape),
|
||||
is_bool_(is_bool),
|
||||
max_value_(max_value),
|
||||
min_value_(min_value),
|
||||
avg_value_(avg_value),
|
||||
count_(count),
|
||||
neg_zero_count_(neg_zero_count),
|
||||
pos_zero_count_(pos_zero_count),
|
||||
nan_count_(nan_count),
|
||||
neg_inf_count_(neg_inf_count),
|
||||
pos_inf_count_(pos_inf_count),
|
||||
zero_count_(zero_count) {}
|
||||
|
||||
const uint64_t data_size() const { return data_size_; }
|
||||
const int dtype() const { return dtype_; }
|
||||
const std::vector<int64_t> &shape() const { return shape_; }
|
||||
const bool is_bool() const { return is_bool_; }
|
||||
const double max_value() const { return max_value_; }
|
||||
const double min_value() const { return min_value_; }
|
||||
const double avg_value() const { return avg_value_; }
|
||||
const int count() const { return count_; }
|
||||
const int neg_zero_count() const { return neg_zero_count_; }
|
||||
const int pos_zero_count() const { return pos_zero_count_; }
|
||||
const int nan_count() const { return nan_count_; }
|
||||
const int neg_inf_count() const { return neg_inf_count_; }
|
||||
const int pos_inf_count() const { return pos_inf_count_; }
|
||||
const int zero_count() const { return zero_count_; }
|
||||
|
||||
uint64_t data_size_;
|
||||
int dtype_;
|
||||
std::vector<int64_t> shape_;
|
||||
bool is_bool_;
|
||||
double max_value_;
|
||||
double min_value_;
|
||||
double avg_value_;
|
||||
int count_;
|
||||
int neg_zero_count_;
|
||||
int pos_zero_count_;
|
||||
int nan_count_;
|
||||
int neg_inf_count_;
|
||||
int pos_inf_count_;
|
||||
int zero_count_;
|
||||
};
|
||||
|
||||
class DbgServices {
|
||||
private:
|
||||
DebugServices *debug_services_;
|
||||
|
@ -141,8 +203,14 @@ class DbgServices {
|
|||
|
||||
std::vector<watchpoint_hit_t> CheckWatchpoints(unsigned int iteration);
|
||||
|
||||
std::vector<std::shared_ptr<TensorData>> ReadTensorsUtil(std::vector<tensor_info_t> info);
|
||||
|
||||
std::vector<tensor_data_t> ReadTensors(std::vector<tensor_info_t> info);
|
||||
|
||||
std::vector<TensorBaseData> ReadTensorsBase(std::vector<tensor_info_t> info);
|
||||
|
||||
std::vector<TensorStatData> ReadTensorsStat(std::vector<tensor_info_t> info);
|
||||
|
||||
std::string GetVersion();
|
||||
};
|
||||
|
||||
|
|
|
@ -27,6 +27,8 @@ PYBIND11_MODULE(_mindspore_offline_debug, m) {
|
|||
.def("RemoveWatchpoint", &DbgServices::RemoveWatchpoint)
|
||||
.def("CheckWatchpoints", &DbgServices::CheckWatchpoints)
|
||||
.def("ReadTensors", &DbgServices::ReadTensors)
|
||||
.def("ReadTensorsBase", &DbgServices::ReadTensorsBase)
|
||||
.def("ReadTensorsStat", &DbgServices::ReadTensorsStat)
|
||||
.def("GetVersion", &DbgServices::GetVersion);
|
||||
|
||||
py::class_<parameter_t>(m, "parameter")
|
||||
|
@ -63,4 +65,28 @@ PYBIND11_MODULE(_mindspore_offline_debug, m) {
|
|||
.def("get_data_size", &tensor_data_t::get_data_size)
|
||||
.def("get_dtype", &tensor_data_t::get_dtype)
|
||||
.def("get_shape", &tensor_data_t::get_shape);
|
||||
|
||||
py::class_<TensorBaseData>(m, "TensorBaseData")
|
||||
.def(py::init<uint64_t, int, std::vector<int64_t>>())
|
||||
.def("data_size", &TensorBaseData::data_size)
|
||||
.def("dtype", &TensorBaseData::dtype)
|
||||
.def("shape", &TensorBaseData::shape);
|
||||
|
||||
py::class_<TensorStatData>(m, "TensorStatData")
|
||||
.def(
|
||||
py::init<uint64_t, int, std::vector<int64_t>, bool, double, double, double, int, int, int, int, int, int, int>())
|
||||
.def("data_size", &TensorStatData::data_size)
|
||||
.def("dtype", &TensorStatData::dtype)
|
||||
.def("shape", &TensorStatData::shape)
|
||||
.def("is_bool", &TensorStatData::is_bool)
|
||||
.def("max_value", &TensorStatData::max_value)
|
||||
.def("min_value", &TensorStatData::min_value)
|
||||
.def("avg_value", &TensorStatData::avg_value)
|
||||
.def("count", &TensorStatData::count)
|
||||
.def("neg_zero_count", &TensorStatData::neg_zero_count)
|
||||
.def("pos_zero_count", &TensorStatData::pos_zero_count)
|
||||
.def("nan_count", &TensorStatData::nan_count)
|
||||
.def("neg_inf_count", &TensorStatData::neg_inf_count)
|
||||
.def("pos_inf_count", &TensorStatData::pos_inf_count)
|
||||
.def("zero_count", &TensorStatData::zero_count);
|
||||
}
|
||||
|
|
|
@ -20,6 +20,7 @@
|
|||
#include <memory>
|
||||
#include <bitset>
|
||||
#include <tuple>
|
||||
#include <type_traits>
|
||||
#include "debug/debugger/tensor_summary.h"
|
||||
|
||||
#ifdef OFFLINE_DBG_MODE
|
||||
|
@ -92,39 +93,45 @@ double VarianceAndMeanCalculator::GetStandardDeviation() { return sqrt(GetVarian
|
|||
|
||||
template <typename T>
|
||||
TensorSummary<T>::TensorSummary(void *current_tensor_ptr, void *const previous_tensor_ptr, uint32_t num_elements)
|
||||
: current_tensor_ptr(reinterpret_cast<T *>(current_tensor_ptr)),
|
||||
prev_tensor_ptr(reinterpret_cast<T *>(previous_tensor_ptr)),
|
||||
num_elements(num_elements),
|
||||
min(std::numeric_limits<double>::max()),
|
||||
max(std::numeric_limits<double>::lowest()),
|
||||
inf_count(0),
|
||||
nan_count(0),
|
||||
zero_count(0),
|
||||
epsilon(1.0e-9),
|
||||
mean_sd_cal_enabled(false) {}
|
||||
: current_tensor_ptr_(reinterpret_cast<T *>(current_tensor_ptr)),
|
||||
prev_tensor_ptr_(reinterpret_cast<T *>(previous_tensor_ptr)),
|
||||
num_elements_(num_elements),
|
||||
min_(std::numeric_limits<double>::max()),
|
||||
max_(std::numeric_limits<double>::lowest()),
|
||||
avg_(0.0),
|
||||
is_bool_(false),
|
||||
neg_zero_count_(0),
|
||||
pos_zero_count_(0),
|
||||
pos_inf_count_(0),
|
||||
neg_inf_count_(0),
|
||||
inf_count_(0),
|
||||
nan_count_(0),
|
||||
zero_count_(0),
|
||||
epsilon_(1.0e-9),
|
||||
mean_sd_cal_enabled_(false) {}
|
||||
|
||||
template <typename T>
|
||||
void TensorSummary<T>::SummarizeTensor(const std::vector<DebugServices::watchpoint_t> &wps) {
|
||||
InitCalculators(wps);
|
||||
for (size_t i = 0; i < num_elements; ++i) {
|
||||
auto current_value = static_cast<double>(current_tensor_ptr[i]);
|
||||
for (size_t i = 0; i < num_elements_; ++i) {
|
||||
auto current_value = static_cast<double>(current_tensor_ptr_[i]);
|
||||
double previous_value =
|
||||
prev_tensor_ptr ? static_cast<double>(prev_tensor_ptr[i]) : std::numeric_limits<double>::quiet_NaN();
|
||||
inf_count += std::isinf(current_value);
|
||||
nan_count += std::isnan(current_value);
|
||||
zero_count += (current_value == 0);
|
||||
max = std::max(max, current_value);
|
||||
min = std::min(min, current_value);
|
||||
if (mean_sd_cal_enabled) {
|
||||
current_mean_variance.ProcessElement(current_value);
|
||||
prev_tensor_ptr_ ? static_cast<double>(prev_tensor_ptr_[i]) : std::numeric_limits<double>::quiet_NaN();
|
||||
inf_count_ += std::isinf(current_value);
|
||||
nan_count_ += std::isnan(current_value);
|
||||
zero_count_ += (current_value == 0);
|
||||
max_ = std::max(max_, current_value);
|
||||
min_ = std::min(min_, current_value);
|
||||
if (mean_sd_cal_enabled_) {
|
||||
current_mean_variance_.ProcessElement(current_value);
|
||||
}
|
||||
for (auto &it : all_close) {
|
||||
for (auto &it : all_close_) {
|
||||
it.second->ProcessElement(current_value, previous_value);
|
||||
}
|
||||
for (auto &range_count : range_counts) {
|
||||
for (auto &range_count : range_counts_) {
|
||||
range_count.second->ProcessElement(current_value);
|
||||
}
|
||||
for (auto &mean : means) {
|
||||
for (auto &mean : means_) {
|
||||
if (mean.first == "curr_prev_diff_mean") {
|
||||
mean.second->ProcessElement(std::abs(current_value - previous_value));
|
||||
} else if (mean.first == "abs_prev_mean") {
|
||||
|
@ -136,6 +143,39 @@ void TensorSummary<T>::SummarizeTensor(const std::vector<DebugServices::watchpoi
|
|||
}
|
||||
}
|
||||
|
||||
template <typename T>
|
||||
void TensorSummary<T>::TensorStatistics(DbgDataType dtype_value) {
|
||||
if (dtype_value == DT_BOOL) {
|
||||
is_bool_ = true;
|
||||
}
|
||||
double sum_elements = 0.0;
|
||||
for (size_t i = 0; i < num_elements_; ++i) {
|
||||
auto current_value = static_cast<double>(current_tensor_ptr_[i]);
|
||||
if (std::isinf(current_value)) {
|
||||
if (current_value > 0) {
|
||||
pos_inf_count_ += 1;
|
||||
} else {
|
||||
neg_inf_count_ += 1;
|
||||
}
|
||||
}
|
||||
zero_count_ += (current_value == 0);
|
||||
nan_count_ += std::isnan(current_value);
|
||||
if (!(std::isnan(current_value) || std::isinf(current_value))) {
|
||||
// only considering tensor elements with value
|
||||
if (std::signbit(current_value) && !(current_value == 0)) {
|
||||
neg_zero_count_ += 1;
|
||||
} else if (!(current_value == 0)) {
|
||||
pos_zero_count_ += 1;
|
||||
}
|
||||
max_ = std::max(max_, current_value);
|
||||
min_ = std::min(min_, current_value);
|
||||
sum_elements += current_value;
|
||||
}
|
||||
}
|
||||
int value_count = zero_count_ + neg_zero_count_ + pos_zero_count_;
|
||||
avg_ = sum_elements / value_count;
|
||||
}
|
||||
|
||||
template <typename T>
|
||||
std::tuple<bool, int, std::vector<DebugServices::parameter_t>> TensorSummary<T>::IsWatchpointHit(
|
||||
DebugServices::watchpoint_t wp) {
|
||||
|
@ -145,24 +185,24 @@ std::tuple<bool, int, std::vector<DebugServices::parameter_t>> TensorSummary<T>:
|
|||
std::bitset<bit_size> error_code;
|
||||
CONDITION_TYPE type = wp.condition.type;
|
||||
// bit 0 denotes presence of nan
|
||||
error_code.set(0, nan_count > 0);
|
||||
error_code.set(0, nan_count_ > 0);
|
||||
// bit 1 denotes presence of inf
|
||||
error_code.set(1, inf_count > 0);
|
||||
error_code.set(1, inf_count_ > 0);
|
||||
|
||||
if (type == CONDITION_TYPE::HAS_NAN) {
|
||||
error_code.reset();
|
||||
hit = nan_count > 0;
|
||||
hit = nan_count_ > 0;
|
||||
} else if (type == CONDITION_TYPE::HAS_INF) {
|
||||
error_code.reset();
|
||||
hit = inf_count > 0;
|
||||
hit = inf_count_ > 0;
|
||||
} else if (type == CONDITION_TYPE::GENERAL_OVERFLOW) {
|
||||
error_code.reset();
|
||||
hit = (nan_count + inf_count) > 0;
|
||||
} else if (type == CONDITION_TYPE::NOT_CHANGED && prev_tensor_ptr && error_code.none()) {
|
||||
hit = all_close[wp.id]->IsAllClose();
|
||||
hit = (nan_count_ + inf_count_) > 0;
|
||||
} else if (type == CONDITION_TYPE::NOT_CHANGED && prev_tensor_ptr_ && error_code.none()) {
|
||||
hit = all_close_[wp.id]->IsAllClose();
|
||||
} else if ((type == CONDITION_TYPE::NOT_CHANGED || type == CONDITION_TYPE::CHANGE_TOO_LARGE ||
|
||||
type == CONDITION_TYPE::CHANGE_TOO_SMALL) &&
|
||||
!prev_tensor_ptr) {
|
||||
!prev_tensor_ptr_) {
|
||||
// bit 2 denotes absence of previous tensor
|
||||
error_code.set(2, true);
|
||||
}
|
||||
|
@ -196,26 +236,26 @@ double_t TensorSummary<T>::StatLookup(const std::string ¶meter_name, const D
|
|||
}
|
||||
|
||||
if (param_type == "max") {
|
||||
return max;
|
||||
return max_;
|
||||
} else if (param_type == "min") {
|
||||
return min;
|
||||
return min_;
|
||||
} else if (param_type == "max_min") {
|
||||
return max - min;
|
||||
return max_ - min_;
|
||||
} else if (param_type == "mean") {
|
||||
return current_mean_variance.GetMean();
|
||||
return current_mean_variance_.GetMean();
|
||||
} else if (param_type == "sd") {
|
||||
return current_mean_variance.GetStandardDeviation();
|
||||
return current_mean_variance_.GetStandardDeviation();
|
||||
} else if (param_type == "abs_mean") {
|
||||
if (means.find("abs_current_mean") != means.end()) {
|
||||
return means["abs_current_mean"]->GetMean();
|
||||
if (means_.find("abs_current_mean") != means_.end()) {
|
||||
return means_["abs_current_mean"]->GetMean();
|
||||
}
|
||||
} else if (param_type == "abs_mean_update_ratio" && prev_tensor_ptr) {
|
||||
if (means.find("curr_prev_diff_mean") != means.end() && means.find("abs_prev_mean") != means.end()) {
|
||||
return means["curr_prev_diff_mean"]->GetMean() / (means["abs_prev_mean"]->GetMean() + epsilon);
|
||||
} else if (param_type == "abs_mean_update_ratio" && prev_tensor_ptr_) {
|
||||
if (means_.find("curr_prev_diff_mean") != means_.end() && means_.find("abs_prev_mean") != means_.end()) {
|
||||
return means_["curr_prev_diff_mean"]->GetMean() / (means_["abs_prev_mean"]->GetMean() + epsilon_);
|
||||
}
|
||||
} else if (param_type == "range_percentage") {
|
||||
if (range_counts.find(wp.id) != range_counts.end()) {
|
||||
return range_counts[wp.id]->GetPercentInRange();
|
||||
if (range_counts_.find(wp.id) != range_counts_.end()) {
|
||||
return range_counts_[wp.id]->GetPercentInRange();
|
||||
}
|
||||
} else if (param_type == "zero_percentage") {
|
||||
return GetZeroValPercent();
|
||||
|
@ -227,54 +267,54 @@ template <typename T>
|
|||
double_t TensorSummary<T>::StatLookup(const DebugServices::watchpoint_t &wp) {
|
||||
CONDITION_TYPE type = wp.condition.type;
|
||||
if (type == CONDITION_TYPE::MAX_LT || type == CONDITION_TYPE::MAX_GT) {
|
||||
return max;
|
||||
return max_;
|
||||
} else if (type == CONDITION_TYPE::MIN_LT || type == CONDITION_TYPE::MIN_GT) {
|
||||
return min;
|
||||
return min_;
|
||||
} else if (type == CONDITION_TYPE::MEAN_LT || type == CONDITION_TYPE::MEAN_GT) {
|
||||
return current_mean_variance.GetMean();
|
||||
return current_mean_variance_.GetMean();
|
||||
} else if (type == CONDITION_TYPE::SD_LT || type == CONDITION_TYPE::SD_GT) {
|
||||
return current_mean_variance.GetStandardDeviation();
|
||||
return current_mean_variance_.GetStandardDeviation();
|
||||
} else if (type == CONDITION_TYPE::MAX_MIN_GT || type == CONDITION_TYPE::MAX_MIN_LT) {
|
||||
return max - min;
|
||||
return max_ - min_;
|
||||
}
|
||||
return std::numeric_limits<double_t>::quiet_NaN();
|
||||
}
|
||||
|
||||
template <typename T>
|
||||
double_t TensorSummary<T>::GetZeroValPercent() {
|
||||
if (num_elements == 0) {
|
||||
if (num_elements_ == 0) {
|
||||
return 0;
|
||||
}
|
||||
|
||||
return (zero_count * 100.0) / num_elements;
|
||||
return (zero_count_ * 100.0) / num_elements_;
|
||||
}
|
||||
|
||||
template <typename T>
|
||||
void TensorSummary<T>::InitCalculators(const std::vector<DebugServices::watchpoint_t> &wps) {
|
||||
for (auto &wp : wps) {
|
||||
auto wp_id = wp.id;
|
||||
mean_sd_cal_enabled = mean_sd_cal_enabled || wp.mean_sd_enabled();
|
||||
if (wp.allclose_enabled() && prev_tensor_ptr) {
|
||||
all_close[wp_id] = std::make_unique<AllCloseCalculator>();
|
||||
mean_sd_cal_enabled_ = mean_sd_cal_enabled_ || wp.mean_sd_enabled();
|
||||
if (wp.allclose_enabled() && prev_tensor_ptr_) {
|
||||
all_close_[wp_id] = std::make_unique<AllCloseCalculator>();
|
||||
if (!wp.parameter_list[0].disabled) {
|
||||
all_close[wp_id]->set_atol(wp.parameter_list[0].value);
|
||||
all_close_[wp_id]->set_atol(wp.parameter_list[0].value);
|
||||
}
|
||||
if (!wp.parameter_list[1].disabled) {
|
||||
all_close[wp_id]->set_rtol(wp.parameter_list[1].value);
|
||||
all_close_[wp_id]->set_rtol(wp.parameter_list[1].value);
|
||||
}
|
||||
} else if (wp.range_enabled()) {
|
||||
range_counts[wp_id] = std::make_unique<RangeCountCalculator>();
|
||||
range_counts_[wp_id] = std::make_unique<RangeCountCalculator>();
|
||||
if (!wp.parameter_list[0].disabled) {
|
||||
range_counts[wp_id]->set_range_start_inclusive(wp.parameter_list[0].value);
|
||||
range_counts_[wp_id]->set_range_start_inclusive(wp.parameter_list[0].value);
|
||||
}
|
||||
if (!wp.parameter_list[1].disabled) {
|
||||
range_counts[wp_id]->set_range_end_inclusive(wp.parameter_list[1].value);
|
||||
range_counts_[wp_id]->set_range_end_inclusive(wp.parameter_list[1].value);
|
||||
}
|
||||
} else if (wp.tensor_update_ratio_mean_enabled() && prev_tensor_ptr) {
|
||||
means.insert({"curr_prev_diff_mean", std::make_unique<MeanCalculator>()});
|
||||
means.insert({"abs_prev_mean", std::make_unique<MeanCalculator>()});
|
||||
} else if (wp.tensor_update_ratio_mean_enabled() && prev_tensor_ptr_) {
|
||||
means_.insert({"curr_prev_diff_mean", std::make_unique<MeanCalculator>()});
|
||||
means_.insert({"abs_prev_mean", std::make_unique<MeanCalculator>()});
|
||||
} else if (wp.abs_mean_enabled()) {
|
||||
means.insert({"abs_current_mean", std::make_unique<MeanCalculator>()});
|
||||
means_.insert({"abs_current_mean", std::make_unique<MeanCalculator>()});
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
|
@ -92,6 +92,18 @@ class ITensorSummary {
|
|||
virtual void SummarizeTensor(const std::vector<DebugServices::watchpoint_t> &) = 0;
|
||||
virtual std::tuple<bool, int32_t, std::vector<DebugServices::parameter_t>> IsWatchpointHit(
|
||||
DebugServices::watchpoint_t) = 0;
|
||||
virtual void TensorStatistics(DbgDataType) = 0;
|
||||
virtual const bool is_bool() const = 0;
|
||||
virtual const double max_value() const = 0;
|
||||
virtual const double min_value() const = 0;
|
||||
virtual const double avg_value() const = 0;
|
||||
virtual const int count() const = 0;
|
||||
virtual const int neg_zero_count() const = 0;
|
||||
virtual const int pos_zero_count() const = 0;
|
||||
virtual const int nan_count() const = 0;
|
||||
virtual const int neg_inf_count() const = 0;
|
||||
virtual const int pos_inf_count() const = 0;
|
||||
virtual const int zero_count() const = 0;
|
||||
};
|
||||
|
||||
template <typename T>
|
||||
|
@ -103,22 +115,40 @@ class TensorSummary : public ITensorSummary {
|
|||
void SummarizeTensor(const std::vector<DebugServices::watchpoint_t> &) override;
|
||||
// returns hit, error_code, parameter_list
|
||||
std::tuple<bool, int, std::vector<DebugServices::parameter_t>> IsWatchpointHit(DebugServices::watchpoint_t) override;
|
||||
void TensorStatistics(DbgDataType) override;
|
||||
const bool is_bool() const override { return is_bool_; }
|
||||
const double max_value() const override { return max_; }
|
||||
const double min_value() const override { return min_; }
|
||||
const double avg_value() const override { return avg_; }
|
||||
const int count() const override { return num_elements_; }
|
||||
const int neg_zero_count() const override { return neg_zero_count_; }
|
||||
const int pos_zero_count() const override { return pos_zero_count_; }
|
||||
const int nan_count() const override { return nan_count_; }
|
||||
const int neg_inf_count() const override { return neg_inf_count_; }
|
||||
const int pos_inf_count() const override { return pos_inf_count_; }
|
||||
const int zero_count() const override { return zero_count_; }
|
||||
|
||||
private:
|
||||
T *current_tensor_ptr;
|
||||
T *prev_tensor_ptr;
|
||||
uint32_t num_elements;
|
||||
double min;
|
||||
double max;
|
||||
uint32_t inf_count;
|
||||
uint32_t nan_count;
|
||||
uint32_t zero_count;
|
||||
double epsilon;
|
||||
bool mean_sd_cal_enabled;
|
||||
VarianceAndMeanCalculator current_mean_variance;
|
||||
std::unordered_map<std::string, std::unique_ptr<MeanCalculator>> means;
|
||||
std::unordered_map<uint32_t, std::unique_ptr<AllCloseCalculator>> all_close;
|
||||
std::unordered_map<uint32_t, std::unique_ptr<RangeCountCalculator>> range_counts;
|
||||
T *current_tensor_ptr_;
|
||||
T *prev_tensor_ptr_;
|
||||
uint32_t num_elements_;
|
||||
double min_;
|
||||
double max_;
|
||||
double avg_;
|
||||
bool is_bool_;
|
||||
uint32_t neg_zero_count_;
|
||||
uint32_t pos_zero_count_;
|
||||
uint32_t pos_inf_count_;
|
||||
uint32_t neg_inf_count_;
|
||||
uint32_t inf_count_;
|
||||
uint32_t nan_count_;
|
||||
uint32_t zero_count_;
|
||||
double epsilon_;
|
||||
bool mean_sd_cal_enabled_;
|
||||
VarianceAndMeanCalculator current_mean_variance_;
|
||||
std::unordered_map<std::string, std::unique_ptr<MeanCalculator>> means_;
|
||||
std::unordered_map<uint32_t, std::unique_ptr<AllCloseCalculator>> all_close_;
|
||||
std::unordered_map<uint32_t, std::unique_ptr<RangeCountCalculator>> range_counts_;
|
||||
double_t StatLookup(const DebugServices::watchpoint_t &);
|
||||
double_t StatLookup(const std::string &, const DebugServices::watchpoint_t &);
|
||||
double_t GetZeroValPercent();
|
||||
|
|
|
@ -17,7 +17,10 @@ The module DbgServices provides offline debugger APIs.
|
|||
"""
|
||||
|
||||
import mindspore._mindspore_offline_debug as cds
|
||||
from mindspore.offline_debug.mi_validators import check_init, check_initialize, check_add_watchpoint, check_remove_watchpoint, check_check_watchpoints, check_read_tensors, check_initialize_done, check_tensor_info_init, check_tensor_data_init, check_watchpoint_hit_init, check_parameter_init
|
||||
from mindspore.offline_debug.mi_validators import check_init, check_initialize, check_add_watchpoint,\
|
||||
check_remove_watchpoint, check_check_watchpoints, check_read_tensor_info, check_initialize_done, \
|
||||
check_tensor_info_init, check_tensor_data_init, check_tensor_base_data_init, check_tensor_stat_data_init,\
|
||||
check_watchpoint_hit_init, check_parameter_init
|
||||
from mindspore.offline_debug.mi_validator_helpers import replace_minus_one
|
||||
|
||||
|
||||
|
@ -238,7 +241,7 @@ class DbgServices():
|
|||
return watchpoint_hit_list
|
||||
|
||||
@check_initialize_done
|
||||
@check_read_tensors
|
||||
@check_read_tensor_info
|
||||
def read_tensors(self, info):
|
||||
"""
|
||||
Returning tensor data object describing the tensor requested tensor.
|
||||
|
@ -277,6 +280,83 @@ class DbgServices():
|
|||
tensor_data_list_ret.append(tensor_data)
|
||||
return tensor_data_list_ret
|
||||
|
||||
@check_initialize_done
|
||||
@check_read_tensor_info
|
||||
def read_tensor_base(self, info):
|
||||
"""
|
||||
Returning tensor base data object describing the requested tensor.
|
||||
|
||||
Args:
|
||||
info (list): List of TensorInfo objects.
|
||||
|
||||
Returns:
|
||||
list, TensorBaseData list.
|
||||
|
||||
Examples:
|
||||
>>> from mindspore.ccsrc.debug.debugger.offline_debug import dbg_services
|
||||
>>> d = dbg_services.DbgServices(dump_file_path="dump_file_path",
|
||||
>>> verbose=True)
|
||||
>>> d_init = d.initialize(is_sync_mode=True)
|
||||
>>> tensor_base_data_list = d_init.read_tensor_base([dbg_services.TensorInfo(node_name="conv2.bias",
|
||||
>>> slot=0,
|
||||
>>> iteration=8,
|
||||
>>> rank_id=5,
|
||||
>>> root_graph_id=0,
|
||||
>>> is_output=True)])
|
||||
"""
|
||||
log("in Python ReadTensorsBase info ", info)
|
||||
info_list_inst = []
|
||||
for elem in info:
|
||||
log("in Python ReadTensorsBase info ", info)
|
||||
info_list_inst.append(elem.instance)
|
||||
tensor_base_data_list = self.dbg_instance.ReadTensorsBase(info_list_inst)
|
||||
tensor_base_data_list_ret = []
|
||||
for elem in tensor_base_data_list:
|
||||
tensor_base_data = TensorBaseData(elem.data_size(), elem.dtype(), elem.shape())
|
||||
tensor_base_data_list_ret.append(tensor_base_data)
|
||||
return tensor_base_data_list_ret
|
||||
|
||||
@check_initialize_done
|
||||
@check_read_tensor_info
|
||||
def read_tensor_stats(self, info):
|
||||
"""
|
||||
Returning tensor statistics object describing the requested tensor.
|
||||
|
||||
Args:
|
||||
info (list): List of TensorInfo objects.
|
||||
|
||||
Returns:
|
||||
list, TensorStatData list.
|
||||
|
||||
Examples:
|
||||
>>> from mindspore.ccsrc.debug.debugger.offline_debug import dbg_services
|
||||
>>> d = dbg_services.DbgServices(dump_file_path="dump_file_path",
|
||||
>>> verbose=True)
|
||||
>>> d_init = d.initialize(is_sync_mode=True)
|
||||
>>> tensor_stat_data_list = d_init.read_tensor_stats([dbg_services.TensorInfo(node_name="conv2.bias",
|
||||
>>> slot=0,
|
||||
>>> iteration=8,
|
||||
>>> rank_id=5,
|
||||
>>> root_graph_id=0,
|
||||
>>> is_output=True)])
|
||||
"""
|
||||
log("in Python ReadTensorsStat info ", info)
|
||||
info_list_inst = []
|
||||
for elem in info:
|
||||
log("in Python ReadTensorsStat info ", info)
|
||||
info_list_inst.append(elem.instance)
|
||||
tensor_stat_data_list = self.dbg_instance.ReadTensorsStat(info_list_inst)
|
||||
tensor_stat_data_list_ret = []
|
||||
for elem in tensor_stat_data_list:
|
||||
tensor_stat_data = TensorStatData(elem.data_size(), elem.dtype(),
|
||||
elem.shape(), elem.is_bool(),
|
||||
elem.max_value(), elem.min_value(),
|
||||
elem.avg_value(), elem.count(), elem.neg_zero_count(),
|
||||
elem.pos_zero_count(), elem.nan_count(), elem.neg_inf_count(),
|
||||
elem.pos_inf_count(), elem.zero_count())
|
||||
tensor_stat_data_list_ret.append(tensor_stat_data)
|
||||
return tensor_stat_data_list_ret
|
||||
|
||||
class TensorInfo():
|
||||
"""
|
||||
Tensor Information class.
|
||||
|
@ -527,6 +607,406 @@ class TensorData():
|
|||
|
||||
return self.instance.get_shape()
|
||||
|
||||
class TensorBaseData():
|
||||
|
||||
"""
|
||||
TensorBaseData class.
|
||||
|
||||
Args:
|
||||
data_size (int): Size of data in bytes.
|
||||
dtype (int): An encoding representing the type of TensorData.
|
||||
shape (list): Shape of tensor.
|
||||
|
||||
Examples:
|
||||
>>> from mindspore.ccsrc.debug.debugger.offline_debug import dbg_services
|
||||
>>> tensor_base_data = dbg_services.TensorBaseData(data_size=4,
|
||||
>>> dtype=0,
|
||||
>>> shape=[2, 2])
|
||||
"""
|
||||
@check_tensor_base_data_init
|
||||
def __init__(self, data_size, dtype, shape):
|
||||
self.instance = cds.TensorBaseData(data_size, dtype, shape)
|
||||
|
||||
@property
|
||||
def data_size(self):
|
||||
"""
|
||||
Function to receive TensorBaseData data_size.
|
||||
|
||||
Returns:
|
||||
int, data_size of TensorBaseData instance.
|
||||
|
||||
Examples:
|
||||
>>> from mindspore.ccsrc.debug.debugger.offline_debug import dbg_services
|
||||
>>> tensor_base_data = dbg_services.TensorBaseData(data_size=4,
|
||||
>>> dtype=0,
|
||||
>>> shape=[2, 2])
|
||||
>>> data_size = tensor_base_data.data_size
|
||||
"""
|
||||
|
||||
return self.instance.data_size()
|
||||
|
||||
@property
|
||||
def dtype(self):
|
||||
"""
|
||||
Function to receive TensorBaseData dtype.
|
||||
|
||||
Returns:
|
||||
int, dtype of TensorBaseData instance.
|
||||
|
||||
Examples:
|
||||
>>> from mindspore.ccsrc.debug.debugger.offline_debug import dbg_services
|
||||
>>> tensor_base_data = dbg_services.TensorBaseData(data_size=4,
|
||||
>>> dtype=0,
|
||||
>>> shape=[2, 2])
|
||||
>>> dtype = tensor_base_data.dtype
|
||||
"""
|
||||
|
||||
return self.instance.dtype()
|
||||
|
||||
@property
|
||||
def shape(self):
|
||||
"""
|
||||
Function to receive TensorBaseData shape.
|
||||
|
||||
Returns:
|
||||
list, shape of TensorBaseData instance.
|
||||
|
||||
Examples:
|
||||
>>> from mindspore.ccsrc.debug.debugger.offline_debug import dbg_services
|
||||
>>> tensor_base_data = dbg_services.TensorBaseData(data_size=4,
|
||||
>>> dtype=0,
|
||||
>>> shape=[2, 2])
|
||||
>>> shape = tensor_base_data.shape
|
||||
"""
|
||||
|
||||
return self.instance.shape()
|
||||
class TensorStatData():
|
||||
|
||||
"""
|
||||
TensorStatData class.
|
||||
|
||||
Args:
|
||||
data_size (int): Size of data in bytes.
|
||||
dtype (int): An encoding representing the type of TensorData.
|
||||
shape (list): Shape of tensor.
|
||||
is_bool (bool): Whether the data type is bool
|
||||
max_value (float): Maximum value in tensor's elements
|
||||
min_value (float): Minimum value in tensor's elements
|
||||
avg_value (float): Average value of all tensor's elements
|
||||
count (int): Number of elements in tensor
|
||||
neg_zero_count (int): Number of negative elements in tensor
|
||||
pos_zero_count (int): Number of positive elements in tensor
|
||||
nan_cout (int): Number of nan elements in tensor
|
||||
neg_inf_count (int): Number of negative infinity elements in tensor
|
||||
pos_inf_count (int): Number of positive infinity elements in tensor
|
||||
zero_count (int): Total number of zero elements in tensor
|
||||
|
||||
|
||||
|
||||
|
||||
Examples:
|
||||
>>> from mindspore.ccsrc.debug.debugger.offline_debug import dbg_services
|
||||
>>> tensor_stat_data = dbg_services.TensorStatData
|
||||
>>> (data_size=4,
|
||||
>>> dtype=0,
|
||||
>>> shape=[2, 2], is_bool = false, max_value = 10.0,
|
||||
>>> min_value = 0.0, avg_value = 5.0,
|
||||
>>> count = 4, neg_zero_count = 0, pos_zero_count = 4, nan_count = 0,
|
||||
>>> neg_inf_count, pos_inf_count, zero_count = 1)
|
||||
"""
|
||||
@check_tensor_stat_data_init
|
||||
def __init__(self, data_size, dtype, shape, is_bool, max_value, min_value, avg_value, count,
|
||||
neg_zero_count, pos_zero_count, nan_count, neg_inf_count, pos_inf_count, zero_count):
|
||||
self.instance = cds.TensorStatData(data_size, dtype, shape, is_bool, max_value,
|
||||
min_value, avg_value, count, neg_zero_count,
|
||||
pos_zero_count, nan_count, neg_inf_count,
|
||||
pos_inf_count, zero_count)
|
||||
|
||||
|
||||
@property
|
||||
def data_size(self):
|
||||
"""
|
||||
Function to receive TensorStatData data_size.
|
||||
|
||||
Returns:
|
||||
int, data_size of TensorStatData instance.
|
||||
|
||||
Examples:
|
||||
>>> from mindspore.ccsrc.debug.debugger.offline_debug import dbg_services
|
||||
>>> tensor_stat_data = dbg_services.TensorStatData
|
||||
>>> (data_size=4,
|
||||
>>> dtype=0,
|
||||
>>> shape=[2, 2], is_bool = false, max_value = 10.0,
|
||||
>>> min_value = 0.0, avg_value = 5.0,
|
||||
>>> count = 4, neg_zero_count = 0, pos_zero_count = 4,
|
||||
>> nan_count = 0, neg_inf_count, pos_inf_count, zero_count = 1)
|
||||
>>> data_size = tensor_stat_data.data_size
|
||||
"""
|
||||
|
||||
return self.instance.data_size()
|
||||
|
||||
@property
|
||||
def dtype(self):
|
||||
"""
|
||||
Function to receive TensorStatData dtype.
|
||||
|
||||
Returns:
|
||||
int, dtype of TensorStatData instance.
|
||||
|
||||
Examples:
|
||||
>>> from mindspore.ccsrc.debug.debugger.offline_debug import dbg_services
|
||||
>>> tensor_stat_data = dbg_services.TensorStatData(data_size=4,
|
||||
>>> dtype=0,
|
||||
>>> shape=[2, 2], is_bool = false, max_value = 10.0,
|
||||
>> min_value = 0.0, avg_value = 5.0,
|
||||
>>> count = 4, neg_zero_count = 0, pos_zero_count = 4, nan_count = 0,
|
||||
>>> neg_inf_count, pos_inf_count, zero_count = 1)
|
||||
>>> dtype = tensor_stat_data.dtype
|
||||
"""
|
||||
|
||||
return self.instance.dtype()
|
||||
|
||||
@property
|
||||
def shape(self):
|
||||
"""
|
||||
Function to receive TensorStatData shape.
|
||||
|
||||
Returns:
|
||||
list, shape of TensorStatData instance.
|
||||
|
||||
Examples:
|
||||
>>> from mindspore.ccsrc.debug.debugger.offline_debug import dbg_services
|
||||
>>> tensor_stat_data = dbg_services.TensorStatData(data_size=4,
|
||||
>>> dtype=0,
|
||||
>>> shape=[2, 2], is_bool = false, max_value = 10.0,
|
||||
>>> min_value = 0.0, avg_value = 5.0,
|
||||
>>> count = 4, neg_zero_count = 0, pos_zero_count = 4, nan_count = 0,
|
||||
>>> neg_inf_count, pos_inf_count, zero_count = 1)
|
||||
>>> shape = tensor_stat_data.shape
|
||||
"""
|
||||
|
||||
return self.instance.shape()
|
||||
|
||||
@property
|
||||
def is_bool(self):
|
||||
"""
|
||||
Function to receive TensorStatData is_bool.
|
||||
|
||||
Returns:
|
||||
bool, Whether the tensor elements are bool.
|
||||
|
||||
Examples:
|
||||
>>> from mindspore.ccsrc.debug.debugger.offline_debug import dbg_services
|
||||
>>> tensor_stat_data = dbg_services.TensorStatData(data_size=4,
|
||||
>>> dtype=0,
|
||||
>>> shape=[2, 2], is_bool = false, max_value = 10.0,
|
||||
>>> min_value = 0.0, avg_value = 5.0,
|
||||
>>> count = 4, neg_zero_count = 0, pos_zero_count = 4, nan_count = 0,
|
||||
>>> neg_inf_count, pos_inf_count, zero_count = 1)
|
||||
>>> is_bool = tensor_stat_data.is_bool
|
||||
"""
|
||||
return self.instance.is_bool()
|
||||
|
||||
@property
|
||||
def max_value(self):
|
||||
"""
|
||||
Function to receive TensorStatData max_value.
|
||||
|
||||
Returns:
|
||||
float, max_value of TensorStatData instance.
|
||||
|
||||
Examples:
|
||||
>>> from mindspore.ccsrc.debug.debugger.offline_debug import dbg_services
|
||||
>>> tensor_stat_data = dbg_services.TensorStatData(data_size=4,
|
||||
>>> dtype=0,
|
||||
>>> shape=[2, 2], is_bool = false, max_value = 10.0,
|
||||
>>> min_value = 0.0, avg_value = 5.0,
|
||||
>>> count = 4, neg_zero_count = 0, pos_zero_count = 4, nan_count = 0,
|
||||
>>> neg_inf_count, pos_inf_count, zero_count = 1)
|
||||
>>> max_value = tensor_stat_data.max_value
|
||||
"""
|
||||
return self.instance.max_value()
|
||||
|
||||
@property
|
||||
def min_value(self):
|
||||
"""
|
||||
Function to receive TensorStatData min_value.
|
||||
|
||||
Returns:
|
||||
float, min_value of TensorStatData instance.
|
||||
|
||||
Examples:
|
||||
>>> from mindspore.ccsrc.debug.debugger.offline_debug import dbg_services
|
||||
>>> tensor_stat_data = dbg_services.TensorStatData(data_size=4,
|
||||
>>> dtype=0,
|
||||
>>> shape=[2, 2], is_bool = false, max_value = 10.0,
|
||||
>>> min_value = 0.0, avg_value = 5.0,
|
||||
>>> count = 4, neg_zero_count = 0, pos_zero_count = 4, nan_count = 0,
|
||||
>>> neg_inf_count, pos_inf_count, zero_count = 1)
|
||||
>>> min_value = tensor_stat_data.min_value
|
||||
"""
|
||||
return self.instance.min_value()
|
||||
|
||||
@property
|
||||
def avg_value(self):
|
||||
"""
|
||||
Function to receive TensorStatData avg_value.
|
||||
|
||||
Returns:
|
||||
float, avg_value of TensorStatData instance.
|
||||
|
||||
Examples:
|
||||
>>> from mindspore.ccsrc.debug.debugger.offline_debug import dbg_services
|
||||
>>> tensor_stat_data = dbg_services.TensorStatData(data_size=4,
|
||||
>>> dtype=0,
|
||||
>>> shape=[2, 2], is_bool = false, max_value = 10.0,
|
||||
>>> min_value = 0.0, avg_value = 5.0,
|
||||
>>> count = 4, neg_zero_count = 0, pos_zero_count = 4, nan_count = 0,
|
||||
>>> neg_inf_count, pos_inf_count, zero_count = 1)
|
||||
>>> avg_value = tensor_stat_data.avg_value
|
||||
"""
|
||||
return self.instance.avg_value()
|
||||
|
||||
@property
|
||||
def count(self):
|
||||
"""
|
||||
Function to receive TensorStatData count.
|
||||
|
||||
Returns:
|
||||
int, count of TensorStatData instance.
|
||||
|
||||
Examples:
|
||||
>>> from mindspore.ccsrc.debug.debugger.offline_debug import dbg_services
|
||||
>>> tensor_stat_data = dbg_services.TensorStatData(data_size=4,
|
||||
>>> dtype=0,
|
||||
>>> shape=[2, 2], is_bool = false, max_value = 10.0,
|
||||
>>> min_value = 0.0, avg_value = 5.0,
|
||||
>>> count = 4, neg_zero_count = 0, pos_zero_count = 4, nan_count = 0,
|
||||
>>> neg_inf_count, pos_inf_count, zero_count = 1)
|
||||
>>> count = tensor_stat_data.count
|
||||
"""
|
||||
return self.instance.count()
|
||||
|
||||
@property
|
||||
def neg_zero_count(self):
|
||||
"""
|
||||
Function to receive TensorStatData neg_zero_count.
|
||||
|
||||
Returns:
|
||||
int, neg_zero_count of TensorStatData instance.
|
||||
|
||||
Examples:
|
||||
>>> from mindspore.ccsrc.debug.debugger.offline_debug import dbg_services
|
||||
>>> tensor_stat_data = dbg_services.TensorStatData(data_size=4,
|
||||
>>> dtype=0,
|
||||
>>> shape=[2, 2], is_bool = false, max_value = 10.0,
|
||||
>>> min_value = 0.0, avg_value = 5.0,
|
||||
>>> count = 4, neg_zero_count = 0, pos_zero_count = 4, nan_count = 0,
|
||||
>>> neg_inf_count, pos_inf_count, zero_count = 1)
|
||||
>>> neg_zero_count = tensor_stat_data.neg_zero_count
|
||||
"""
|
||||
return self.instance.neg_zero_count()
|
||||
|
||||
@property
|
||||
def pos_zero_count(self):
|
||||
"""
|
||||
Function to receive TensorStatData pos_zero_count.
|
||||
|
||||
Returns:
|
||||
int, pos_zero_count of TensorStatData instance.
|
||||
|
||||
Examples:
|
||||
>>> from mindspore.ccsrc.debug.debugger.offline_debug import dbg_services
|
||||
>>> tensor_stat_data = dbg_services.TensorStatData(data_size=4,
|
||||
>>> dtype=0,
|
||||
>>> shape=[2, 2], is_bool = false, max_value = 10.0,
|
||||
>>> min_value = 0.0, avg_value = 5.0,
|
||||
>>> count = 4, neg_zero_count = 0, pos_zero_count = 4, nan_count = 0,
|
||||
>>> neg_inf_count, pos_inf_count, zero_count = 1)
|
||||
>>> pos_zero_count = tensor_stat_data.pos_zero_count
|
||||
"""
|
||||
return self.instance.pos_zero_count()
|
||||
|
||||
@property
|
||||
def zero_count(self):
|
||||
"""
|
||||
Function to receive TensorStatData zero_count.
|
||||
|
||||
Returns:
|
||||
int, zero_count of TensorStatData instance.
|
||||
|
||||
Examples:
|
||||
>>> from mindspore.ccsrc.debug.debugger.offline_debug import dbg_services
|
||||
>>> tensor_stat_data = dbg_services.TensorStatData(data_size=4,
|
||||
>>> dtype=0,
|
||||
>>> shape=[2, 2], is_bool = false, max_value = 10.0,
|
||||
>>> min_value = 0.0, avg_value = 5.0,
|
||||
>>> count = 4, neg_zero_count = 0, pos_zero_count = 4, nan_count = 0,
|
||||
>>> neg_inf_count, pos_inf_count, zero_count = 1)
|
||||
>>> zero_count = tensor_stat_data.zero_count
|
||||
"""
|
||||
return self.instance.zero_count()
|
||||
|
||||
@property
|
||||
def nan_count(self):
|
||||
"""
|
||||
Function to receive TensorStatData nan_count.
|
||||
|
||||
Returns:
|
||||
int, nan_count of TensorStatData instance.
|
||||
|
||||
Examples:
|
||||
>>> from mindspore.ccsrc.debug.debugger.offline_debug import dbg_services
|
||||
>>> tensor_stat_data = dbg_services.TensorStatData(data_size=4,
|
||||
>>> dtype=0,
|
||||
>>> shape=[2, 2], is_bool = false, max_value = 10.0,
|
||||
>>> min_value = 0.0, avg_value = 5.0,
|
||||
>>> count = 4, neg_zero_count = 0, pos_zero_count = 4, nan_count = 0,
|
||||
>>> neg_inf_count, pos_inf_count, zero_count = 1)
|
||||
>>> nan_count = tensor_stat_data.nan_count
|
||||
"""
|
||||
return self.instance.nan_count()
|
||||
|
||||
@property
|
||||
def neg_inf_count(self):
|
||||
"""
|
||||
Function to receive TensorStatData shape.
|
||||
|
||||
Returns:
|
||||
int, neg_inf_count of TensorStatData instance.
|
||||
|
||||
Examples:
|
||||
>>> from mindspore.ccsrc.debug.debugger.offline_debug import dbg_services
|
||||
>>> tensor_stat_data = dbg_services.TensorStatData(data_size=4,
|
||||
>>> dtype=0,
|
||||
>>> shape=[2, 2], is_bool = false, max_value = 10.0,
|
||||
>>> min_value = 0.0, avg_value = 5.0,
|
||||
>>> count = 4, neg_zero_count = 0, pos_zero_count = 4, nan_count = 0,
|
||||
>>> neg_inf_count, pos_inf_count, zero_count = 1)
|
||||
>>> neg_inf_count = tensor_stat_data.neg_inf_count
|
||||
"""
|
||||
return self.instance.neg_inf_count()
|
||||
|
||||
@property
|
||||
def pos_inf_count(self):
|
||||
"""
|
||||
Function to receive TensorStatData pos_inf_count.
|
||||
|
||||
Returns:
|
||||
pos_inf_count of TensorStatData instance (int).
|
||||
|
||||
Examples:
|
||||
>>> from mindspore.ccsrc.debug.debugger.offline_debug import dbg_services
|
||||
>>> tensor_stat_data = dbg_services.TensorStatData(data_size=4,
|
||||
>>> dtype=0,
|
||||
>>> shape=[2, 2], is_bool = false, max_value = 10.0,
|
||||
>>> min_value = 0.0, avg_value = 5.0,
|
||||
>>> count = 4, neg_zero_count = 0, pos_zero_count = 1, nan_count = 0,
|
||||
>>> neg_inf_count, pos_inf_count, zero_count = 1)
|
||||
>>> pos_inf_count = tensor_stat_data.pos_inf_count
|
||||
"""
|
||||
return self.instance.pos_inf_count()
|
||||
|
||||
class WatchpointHit():
|
||||
"""
|
||||
WatchpointHit class.
|
||||
|
@ -583,7 +1063,7 @@ class WatchpointHit():
|
|||
>>> name = watchpoint_hit.name
|
||||
"""
|
||||
|
||||
return self.instance.get_name()
|
||||
return self.instance.name()
|
||||
|
||||
@property
|
||||
def slot(self):
|
||||
|
@ -606,7 +1086,7 @@ class WatchpointHit():
|
|||
>>> slot = watchpoint_hit.slot
|
||||
"""
|
||||
|
||||
return self.instance.get_slot()
|
||||
return self.instance.slot()
|
||||
|
||||
@property
|
||||
def condition(self):
|
||||
|
@ -629,7 +1109,7 @@ class WatchpointHit():
|
|||
>>> condition = watchpoint_hit.condition
|
||||
"""
|
||||
|
||||
return self.instance.get_condition()
|
||||
return self.instance.condition()
|
||||
|
||||
@property
|
||||
def watchpoint_id(self):
|
||||
|
@ -652,7 +1132,7 @@ class WatchpointHit():
|
|||
>>> watchpoint_id = watchpoint_hit.watchpoint_id
|
||||
"""
|
||||
|
||||
return self.instance.get_watchpoint_id()
|
||||
return self.instance.watchpoint_id()
|
||||
|
||||
@property
|
||||
def parameters(self):
|
||||
|
@ -675,7 +1155,7 @@ class WatchpointHit():
|
|||
>>> parameters = watchpoint_hit.parameters
|
||||
"""
|
||||
|
||||
params = self.instance.get_parameters()
|
||||
params = self.instance.parameters()
|
||||
param_list = []
|
||||
for elem in params:
|
||||
tmp = Parameter(elem.get_name(),
|
||||
|
|
|
@ -121,7 +121,7 @@ def check_check_watchpoints(method):
|
|||
return new_method
|
||||
|
||||
|
||||
def check_read_tensors(method):
|
||||
def check_read_tensor_info(method):
|
||||
"""Wrapper method to check the parameters of DbgServices ReadTensors."""
|
||||
|
||||
@wraps(method)
|
||||
|
@ -189,6 +189,52 @@ def check_tensor_data_init(method):
|
|||
|
||||
return new_method
|
||||
|
||||
def check_tensor_base_data_init(method):
|
||||
"""Wrapper method to check the parameters of DbgServices TensorBaseData init."""
|
||||
|
||||
@wraps(method)
|
||||
def new_method(self, *args, **kwargs):
|
||||
[data_size, dtype, shape], _ = parse_user_args(method, *args, **kwargs)
|
||||
|
||||
check_uint64(data_size, "data_size")
|
||||
type_check(dtype, (int,), "dtype")
|
||||
shape_names = ["shape_{0}".format(i) for i in range(len(shape))]
|
||||
type_check_list(shape, (int,), shape_names)
|
||||
|
||||
return method(self, *args, **kwargs)
|
||||
|
||||
return new_method
|
||||
|
||||
def check_tensor_stat_data_init(method):
|
||||
"""Wrapper method to check the parameters of DbgServices TensorBaseData init."""
|
||||
|
||||
@wraps(method)
|
||||
def new_method(self, *args, **kwargs):
|
||||
[data_size, dtype, shape, is_bool, max_value, min_value,
|
||||
avg_value, count, neg_zero_count, pos_zero_count,
|
||||
nan_count, neg_inf_count, pos_inf_count,
|
||||
zero_count], _ = parse_user_args(method, *args, **kwargs)
|
||||
|
||||
check_uint64(data_size, "data_size")
|
||||
type_check(dtype, (int,), "dtype")
|
||||
shape_names = ["shape_{0}".format(i) for i in range(len(shape))]
|
||||
type_check_list(shape, (int,), shape_names)
|
||||
type_check(is_bool, (bool,), "is_bool")
|
||||
type_check(max_value, (float,), "max_value")
|
||||
type_check(min_value, (float,), "min_value")
|
||||
type_check(avg_value, (float,), "avg_value")
|
||||
type_check(count, (int,), "count")
|
||||
type_check(neg_zero_count, (int,), "neg_zero_count")
|
||||
type_check(pos_zero_count, (int,), "pos_zero_count")
|
||||
type_check(nan_count, (int,), "nan_count")
|
||||
type_check(neg_inf_count, (int,), "neg_inf_count")
|
||||
type_check(pos_inf_count, (int,), "pos_inf_count")
|
||||
type_check(zero_count, (int,), "zero_count")
|
||||
|
||||
|
||||
return method(self, *args, **kwargs)
|
||||
|
||||
return new_method
|
||||
|
||||
def check_watchpoint_hit_init(method):
|
||||
"""Wrapper method to check the parameters of DbgServices WatchpointHit init."""
|
||||
|
|
|
@ -0,0 +1,87 @@
|
|||
-----------------------------------------------------------
|
||||
tensor_info_1 attributes:
|
||||
node name = Default/Add-op4
|
||||
slot = 0
|
||||
iteration = 0
|
||||
rank_id = 0
|
||||
root_graph_id = 0
|
||||
is_output = True
|
||||
|
||||
tensor_base_info:
|
||||
size in bytes = 24
|
||||
debugger dtype = 11
|
||||
shape = [2, 3]
|
||||
|
||||
tensor_stat_info:
|
||||
size in bytes = 24
|
||||
debugger dtype = 11
|
||||
shape = [2, 3]
|
||||
is_bool = False
|
||||
max_value = 10.0
|
||||
min_value = -11.0
|
||||
avg_value = 0.880000114440918
|
||||
count = 6
|
||||
neg_zero_count = 2
|
||||
pos_zero_count = 3
|
||||
nan_count = 0
|
||||
neg_inf_count = 0
|
||||
pos_inf_count = 0
|
||||
zero_count = 1
|
||||
-----------------------------------------------------------
|
||||
tensor_info_2 attributes:
|
||||
node name = Default/Reciprocal-op3
|
||||
slot = 0
|
||||
iteration = 0
|
||||
rank_id = 0
|
||||
root_graph_id = 0
|
||||
is_output = True
|
||||
|
||||
tensor_base_info:
|
||||
size in bytes = 40
|
||||
debugger dtype = 11
|
||||
shape = [2, 5]
|
||||
|
||||
tensor_stat_info:
|
||||
size in bytes = 40
|
||||
debugger dtype = 11
|
||||
shape = [2, 5]
|
||||
is_bool = False
|
||||
max_value = 1.0
|
||||
min_value = 1.0
|
||||
avg_value = 1.0
|
||||
count = 10
|
||||
neg_zero_count = 0
|
||||
pos_zero_count = 2
|
||||
nan_count = 0
|
||||
neg_inf_count = 3
|
||||
pos_inf_count = 5
|
||||
zero_count = 0
|
||||
-----------------------------------------------------------
|
||||
tensor_info_3 attributes:
|
||||
node name = Default/network-WithLossCell/_backbone-MockModel/ReduceMean-op92
|
||||
slot = 0
|
||||
iteration = 0
|
||||
rank_id = 0
|
||||
root_graph_id = 0
|
||||
is_output = True
|
||||
|
||||
tensor_base_info:
|
||||
size in bytes = 20
|
||||
debugger dtype = 11
|
||||
shape = [5]
|
||||
|
||||
tensor_stat_info:
|
||||
size in bytes = 20
|
||||
debugger dtype = 11
|
||||
shape = [5]
|
||||
is_bool = False
|
||||
max_value = 1.9901361465454102
|
||||
min_value = -2.175431728363037
|
||||
avg_value = -0.6648297309875488
|
||||
count = 5
|
||||
neg_zero_count = 2
|
||||
pos_zero_count = 1
|
||||
nan_count = 2
|
||||
neg_inf_count = 0
|
||||
pos_inf_count = 0
|
||||
zero_count = 0
|
|
@ -0,0 +1,146 @@
|
|||
# Copyright 2021 Huawei Technologies Co., Ltd
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
# ==============================================================================
|
||||
"""
|
||||
Read tensor base and statistics test script for offline debugger APIs.
|
||||
"""
|
||||
|
||||
import tempfile
|
||||
import os
|
||||
import shutil
|
||||
import numpy as np
|
||||
import mindspore.offline_debug.dbg_services as d
|
||||
from dump_test_utils import compare_actual_with_expected
|
||||
|
||||
GENERATE_GOLDEN = False
|
||||
test_name = "sync_read_tensors_base_stat"
|
||||
|
||||
|
||||
def test_sync_read_tensors_base_stat():
|
||||
|
||||
value_tensor = np.array([[7.5, 8.56, -9.78], [10.0, -11.0, 0.0]], np.float32)
|
||||
inf_tensor = np.array([[1., -np.inf, np.inf, -np.inf, np.inf], [np.inf, 1., -np.inf, np.inf, np.inf]], np.float32)
|
||||
nan_tensor = np.array([-2.1754317, 1.9901361, np.nan, np.nan, -1.8091936], np.float32)
|
||||
|
||||
value_path = build_dump_file_structure(value_tensor, "Add", "Add.Add-op4.0.0.")
|
||||
inf_path = build_dump_file_structure(inf_tensor, "Inf", "Reciprocal.Reciprocal-op3.0.0.")
|
||||
nan_path = build_dump_file_structure(nan_tensor, "Nan", "ReduceMean.ReduceMean-op92.0.0.")
|
||||
|
||||
debugger_backend = d.DbgServices(
|
||||
dump_file_path=value_path, verbose=True)
|
||||
|
||||
_ = debugger_backend.initialize(
|
||||
net_name="Add", is_sync_mode=True)
|
||||
|
||||
debugger_backend_2 = d.DbgServices(
|
||||
dump_file_path=inf_path, verbose=True)
|
||||
|
||||
_ = debugger_backend_2.initialize(
|
||||
net_name="Inf", is_sync_mode=True)
|
||||
|
||||
debugger_backend_3 = d.DbgServices(
|
||||
dump_file_path=nan_path, verbose=True)
|
||||
|
||||
_ = debugger_backend_3.initialize(
|
||||
net_name="Nan", is_sync_mode=True)
|
||||
|
||||
info1 = d.TensorInfo(node_name="Default/Add-op4",
|
||||
slot=0, iteration=0, rank_id=0, root_graph_id=0, is_output=True)
|
||||
info2 = d.TensorInfo(node_name="Default/Reciprocal-op3",
|
||||
slot=0, iteration=0, rank_id=0, root_graph_id=0, is_output=True)
|
||||
info3 = d.TensorInfo(node_name="Default/network-WithLossCell/_backbone-MockModel/ReduceMean-op92",
|
||||
slot=0, iteration=0, rank_id=0, root_graph_id=0, is_output=True)
|
||||
|
||||
|
||||
tensor_info_1 = [info1]
|
||||
tensor_info_2 = [info2]
|
||||
tensor_info_3 = [info3]
|
||||
tensor_info = [info1, info2, info3]
|
||||
|
||||
tensor_base_data_list = debugger_backend.read_tensor_base(tensor_info_1)
|
||||
tensor_base_data_list_2 = debugger_backend_2.read_tensor_base(tensor_info_2)
|
||||
tensor_base_data_list.extend(tensor_base_data_list_2)
|
||||
tensor_base_data_list_3 = debugger_backend_3.read_tensor_base(tensor_info_3)
|
||||
tensor_base_data_list.extend(tensor_base_data_list_3)
|
||||
|
||||
tensor_stat_data_list = debugger_backend.read_tensor_stats(tensor_info_1)
|
||||
tensor_stat_data_list_2 = debugger_backend_2.read_tensor_stats(tensor_info_2)
|
||||
tensor_stat_data_list.extend(tensor_stat_data_list_2)
|
||||
tensor_stat_data_list_3 = debugger_backend_3.read_tensor_stats(tensor_info_3)
|
||||
tensor_stat_data_list.extend(tensor_stat_data_list_3)
|
||||
|
||||
shutil.rmtree(value_path)
|
||||
shutil.rmtree(inf_path)
|
||||
shutil.rmtree(nan_path)
|
||||
print_read_tensors(tensor_info, tensor_base_data_list, tensor_stat_data_list)
|
||||
assert compare_actual_with_expected(test_name)
|
||||
|
||||
|
||||
def build_dump_file_structure(tensor_array, net_name, tensor_name):
|
||||
debugger_temp_dir = tempfile.mkdtemp(prefix=net_name, dir="./")
|
||||
print(debugger_temp_dir)
|
||||
path = os.path.join(debugger_temp_dir, "rank_0", net_name, "0", "0")
|
||||
print(path)
|
||||
os.makedirs(path, exist_ok=True)
|
||||
file = tempfile.mkstemp(prefix=tensor_name, suffix=".output.0.DefaultFormat.npy", dir=path)
|
||||
full_path = file[1]
|
||||
np.save(full_path, tensor_array)
|
||||
|
||||
return debugger_temp_dir
|
||||
|
||||
def print_read_tensors(tensor_info, tensor_base_data_list, tensor_stat_data_list):
|
||||
"""Print read tensors info."""
|
||||
if GENERATE_GOLDEN:
|
||||
f_write = open(test_name + ".expected", "w")
|
||||
else:
|
||||
f_write = open(test_name + ".actual", "w")
|
||||
|
||||
for x, _ in enumerate(tensor_info):
|
||||
f_write.write(
|
||||
"-----------------------------------------------------------\n")
|
||||
f_write.write("tensor_info_" + str(x+1) + " attributes:\n")
|
||||
f_write.write("node name = " + tensor_info[x].node_name + "\n")
|
||||
f_write.write("slot = " + str(tensor_info[x].slot) + "\n")
|
||||
f_write.write("iteration = " + str(tensor_info[x].iteration) + "\n")
|
||||
f_write.write("rank_id = " + str(tensor_info[x].rank_id) + "\n")
|
||||
f_write.write("root_graph_id = " +
|
||||
str(tensor_info[x].root_graph_id) + "\n")
|
||||
f_write.write("is_output = " +
|
||||
str(tensor_info[x].is_output) + "\n")
|
||||
f_write.write("\n")
|
||||
f_write.write("tensor_base_info:\n")
|
||||
f_write.write("size in bytes = " +
|
||||
str(tensor_base_data_list[x].data_size) + "\n")
|
||||
f_write.write("debugger dtype = " + str(tensor_base_data_list[x].dtype) + "\n")
|
||||
f_write.write("shape = " + str(tensor_base_data_list[x].shape) + "\n")
|
||||
|
||||
f_write.write("\n")
|
||||
f_write.write("tensor_stat_info:\n")
|
||||
|
||||
f_write.write("size in bytes = " +
|
||||
str(tensor_stat_data_list[x].data_size) + "\n")
|
||||
f_write.write("debugger dtype = " + str(tensor_stat_data_list[x].dtype) + "\n")
|
||||
f_write.write("shape = " + str(tensor_stat_data_list[x].shape) + "\n")
|
||||
f_write.write("is_bool = " + str(tensor_stat_data_list[x].is_bool) + "\n")
|
||||
f_write.write("max_value = " + str(tensor_stat_data_list[x].max_value) + "\n")
|
||||
f_write.write("min_value = " + str(tensor_stat_data_list[x].min_value) + "\n")
|
||||
f_write.write("avg_value = " + str(tensor_stat_data_list[x].avg_value) + "\n")
|
||||
f_write.write("count = " + str(tensor_stat_data_list[x].count) + "\n")
|
||||
f_write.write("neg_zero_count = " + str(tensor_stat_data_list[x].neg_zero_count) + "\n")
|
||||
f_write.write("pos_zero_count = " + str(tensor_stat_data_list[x].pos_zero_count) + "\n")
|
||||
f_write.write("nan_count = " + str(tensor_stat_data_list[x].nan_count) + "\n")
|
||||
f_write.write("neg_inf_count = " + str(tensor_stat_data_list[x].neg_inf_count) + "\n")
|
||||
f_write.write("pos_inf_count = " + str(tensor_stat_data_list[x].pos_inf_count) + "\n")
|
||||
f_write.write("zero_count = " + str(tensor_stat_data_list[x].zero_count) + "\n")
|
||||
f_write.close()
|
Loading…
Reference in New Issue