forked from mindspore-Ecosystem/mindspore
!21389 Add Tensor Base and Stat info to offline debugger
Merge pull request !21389 from parastooashtari/tensor_info_levels
This commit is contained in:
commit
2edaba38bf
|
@ -131,6 +131,30 @@ std::unique_ptr<ITensorSummary> GetSummaryPtr(const std::shared_ptr<TensorData>
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
DebugServices::TensorStat DebugServices::GetTensorStatistics(const std::shared_ptr<TensorData> &tensor) {
|
||||||
|
if (tensor == nullptr) {
|
||||||
|
MS_LOG(WARNING) << "Tensor is nullptr, returning empty tensor statistics.";
|
||||||
|
TensorStat empty_tensor_stat_data;
|
||||||
|
return empty_tensor_stat_data;
|
||||||
|
}
|
||||||
|
std::unique_ptr<ITensorSummary> base_summary_ptr;
|
||||||
|
void *previous_tensor_ptr = nullptr;
|
||||||
|
base_summary_ptr = GetSummaryPtr(tensor, previous_tensor_ptr, tensor->GetNumElements(), tensor->GetType());
|
||||||
|
if (base_summary_ptr == nullptr) {
|
||||||
|
MS_LOG(WARNING) << "base_summary_ptr is nullptr, returning empty tensor statistics.";
|
||||||
|
TensorStat empty_tensor_stat_data;
|
||||||
|
return empty_tensor_stat_data;
|
||||||
|
}
|
||||||
|
base_summary_ptr->TensorStatistics(tensor->GetType());
|
||||||
|
TensorStat tensor_stat_data(tensor->GetByteSize(), tensor->GetType(), tensor->GetShape(), base_summary_ptr->is_bool(),
|
||||||
|
base_summary_ptr->max_value(), base_summary_ptr->min_value(),
|
||||||
|
base_summary_ptr->avg_value(), base_summary_ptr->count(),
|
||||||
|
base_summary_ptr->neg_zero_count(), base_summary_ptr->pos_zero_count(),
|
||||||
|
base_summary_ptr->nan_count(), base_summary_ptr->neg_inf_count(),
|
||||||
|
base_summary_ptr->pos_inf_count(), base_summary_ptr->zero_count());
|
||||||
|
|
||||||
|
return tensor_stat_data;
|
||||||
|
}
|
||||||
#ifdef OFFLINE_DBG_MODE
|
#ifdef OFFLINE_DBG_MODE
|
||||||
void *DebugServices::GetPrevTensor(const std::shared_ptr<TensorData> &tensor, bool previous_iter_tensor_needed) {
|
void *DebugServices::GetPrevTensor(const std::shared_ptr<TensorData> &tensor, bool previous_iter_tensor_needed) {
|
||||||
void *previous_tensor_ptr = nullptr;
|
void *previous_tensor_ptr = nullptr;
|
||||||
|
@ -317,7 +341,11 @@ void DebugServices::CheckWatchpoints(std::vector<std::string> *const name, std::
|
||||||
MS_LOG(INFO) << "tensor list size: " << tensor_list_size;
|
MS_LOG(INFO) << "tensor list size: " << tensor_list_size;
|
||||||
if (tensor_list_size == 0) return;
|
if (tensor_list_size == 0) return;
|
||||||
// default value for number of threads
|
// default value for number of threads
|
||||||
const int max_thread_num = 32;
|
const int default_thread_num = 32;
|
||||||
|
int max_thread_num = default_thread_num;
|
||||||
|
if (max_thread_num > tensor_list_size) {
|
||||||
|
max_thread_num = tensor_list_size;
|
||||||
|
}
|
||||||
MS_LOG(INFO) << "Number of threads used for checkwatchpoint: " << max_thread_num;
|
MS_LOG(INFO) << "Number of threads used for checkwatchpoint: " << max_thread_num;
|
||||||
int chunk_size = tensor_list_size / max_thread_num;
|
int chunk_size = tensor_list_size / max_thread_num;
|
||||||
int remainder = tensor_list_size % max_thread_num;
|
int remainder = tensor_list_size % max_thread_num;
|
||||||
|
@ -757,78 +785,100 @@ void DebugServices::ReadDumpedTensor(std::vector<std::string> backend_name, std:
|
||||||
std::to_string(root_graph_id[i]) + "/" + IterationString(iteration[i]);
|
std::to_string(root_graph_id[i]) + "/" + IterationString(iteration[i]);
|
||||||
|
|
||||||
// search files in dir for the one that meets the filename prefix and read the file into memory
|
// search files in dir for the one that meets the filename prefix and read the file into memory
|
||||||
std::vector<char> *buffer = NULL;
|
|
||||||
std::string type_name = "";
|
|
||||||
std::vector<int64_t> shape;
|
|
||||||
uint64_t data_size = 0;
|
|
||||||
if (is_sync_mode_) {
|
if (is_sync_mode_) {
|
||||||
std::string abspath = RealPath(specific_dump_dir);
|
ReadDumpedTensorSync(prefix_dump_file_name, specific_dump_dir, backend_name[i], slot[i], device_id[i],
|
||||||
DIR *d = opendir(abspath.c_str());
|
iteration[i], root_graph_id[i], is_output[i], result_list);
|
||||||
bool found_file = false;
|
|
||||||
std::vector<std::string> matched_paths;
|
|
||||||
if (d == nullptr) {
|
|
||||||
MS_LOG(ERROR) << "Directory " << specific_dump_dir << " does not exist!";
|
|
||||||
} else {
|
|
||||||
struct dirent *dir = nullptr;
|
|
||||||
while ((dir = readdir(d)) != NULL) {
|
|
||||||
if (dir->d_type == DT_REG) {
|
|
||||||
std::string file_name = dir->d_name;
|
|
||||||
std::string stripped_file_name = GetStrippedFilename(file_name);
|
|
||||||
if (stripped_file_name.empty()) {
|
|
||||||
continue;
|
|
||||||
}
|
|
||||||
std::size_t found = stripped_file_name.rfind(prefix_dump_file_name, 0);
|
|
||||||
if (found != 0) {
|
|
||||||
continue;
|
|
||||||
}
|
|
||||||
|
|
||||||
std::string full_path = specific_dump_dir + "/" + file_name;
|
|
||||||
matched_paths.push_back(full_path);
|
|
||||||
found_file = true;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
(void)closedir(d);
|
|
||||||
}
|
|
||||||
|
|
||||||
if (found_file) {
|
|
||||||
shape.clear();
|
|
||||||
std::string result_path = GetNewestFilePath(matched_paths);
|
|
||||||
ReadTensorFromNpy(result_path, &type_name, &data_size, &shape, &buffer);
|
|
||||||
AddToTensorData(backend_name[i], slot[i], iteration[i], device_id[i], root_graph_id[i], is_output[i], data_size,
|
|
||||||
type_name, shape, buffer, result_list);
|
|
||||||
} else {
|
|
||||||
AddToTensorData(backend_name[i], slot[i], iteration[i], device_id[i], root_graph_id[i], is_output[i], 0,
|
|
||||||
type_name, shape, buffer, result_list);
|
|
||||||
MS_LOG(INFO) << "Target tensor has not been found.";
|
|
||||||
}
|
|
||||||
} else {
|
} else {
|
||||||
bool found = false;
|
ReadDumpedTensorAsync(specific_dump_dir, prefix_dump_to_check, slot_string_to_check, backend_name[i], slot[i],
|
||||||
std::vector<std::string> matched_paths;
|
device_id[i], iteration[i], root_graph_id[i], is_output[i], async_file_pool, result_list);
|
||||||
// if async mode
|
|
||||||
for (const std::string &file_path : async_file_pool) {
|
|
||||||
if (file_path.find(specific_dump_dir) != std::string::npos &&
|
|
||||||
file_path.find(prefix_dump_to_check) != std::string::npos &&
|
|
||||||
file_path.find(slot_string_to_check) != std::string::npos) {
|
|
||||||
matched_paths.push_back(file_path);
|
|
||||||
found = true;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
if (found) {
|
|
||||||
shape.clear();
|
|
||||||
std::string result_path = GetNewestFilePath(matched_paths);
|
|
||||||
ReadTensorFromNpy(result_path, &type_name, &data_size, &shape, &buffer);
|
|
||||||
AddToTensorData(backend_name[i], slot[i], iteration[i], device_id[i], root_graph_id[i], is_output[i], data_size,
|
|
||||||
type_name, shape, buffer, result_list);
|
|
||||||
} else {
|
|
||||||
// If no npy file is found, add empty tensor data.
|
|
||||||
AddToTensorData(backend_name[i], slot[i], iteration[i], device_id[i], root_graph_id[i], is_output[i], 0,
|
|
||||||
type_name, shape, buffer, result_list);
|
|
||||||
MS_LOG(INFO) << "Target tensor has not been found.";
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
void DebugServices::ReadDumpedTensorSync(const std::string &prefix_dump_file_name, const std::string &specific_dump_dir,
|
||||||
|
const std::string &backend_name, size_t slot, unsigned int device_id,
|
||||||
|
unsigned int iteration, unsigned int root_graph_id, const bool &is_output,
|
||||||
|
std::vector<std::shared_ptr<TensorData>> *result_list) {
|
||||||
|
std::vector<char> *buffer = NULL;
|
||||||
|
std::string type_name = "";
|
||||||
|
std::vector<int64_t> shape;
|
||||||
|
uint64_t data_size = 0;
|
||||||
|
std::string abspath = RealPath(specific_dump_dir);
|
||||||
|
DIR *d = opendir(abspath.c_str());
|
||||||
|
bool found_file = false;
|
||||||
|
std::vector<std::string> matched_paths;
|
||||||
|
if (d == nullptr) {
|
||||||
|
MS_LOG(ERROR) << "Directory " << specific_dump_dir << " does not exist!";
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
struct dirent *dir = nullptr;
|
||||||
|
while ((dir = readdir(d)) != NULL) {
|
||||||
|
if (dir->d_type == DT_REG) {
|
||||||
|
std::string file_name = dir->d_name;
|
||||||
|
std::string stripped_file_name = GetStrippedFilename(file_name);
|
||||||
|
if (stripped_file_name.empty()) {
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
std::size_t found = stripped_file_name.rfind(prefix_dump_file_name, 0);
|
||||||
|
if (found != 0) {
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
|
||||||
|
std::string full_path = specific_dump_dir + "/" + file_name;
|
||||||
|
matched_paths.push_back(full_path);
|
||||||
|
found_file = true;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
if (found_file) {
|
||||||
|
shape.clear();
|
||||||
|
std::string result_path = GetNewestFilePath(matched_paths);
|
||||||
|
ReadTensorFromNpy(result_path, &type_name, &data_size, &shape, &buffer);
|
||||||
|
AddToTensorData(backend_name, slot, iteration, device_id, root_graph_id, is_output, data_size, type_name, shape,
|
||||||
|
buffer, result_list);
|
||||||
|
} else {
|
||||||
|
AddToTensorData(backend_name, slot, iteration, device_id, root_graph_id, is_output, 0, type_name, shape, buffer,
|
||||||
|
result_list);
|
||||||
|
MS_LOG(INFO) << "Target tensor has not been found.";
|
||||||
|
}
|
||||||
|
(void)closedir(d);
|
||||||
|
}
|
||||||
|
|
||||||
|
void DebugServices::ReadDumpedTensorAsync(const std::string &specific_dump_dir, const std::string &prefix_dump_to_check,
|
||||||
|
const std::string &slot_string_to_check, const std::string &backend_name,
|
||||||
|
size_t slot, unsigned int device_id, unsigned int iteration,
|
||||||
|
unsigned int root_graph_id, const bool &is_output,
|
||||||
|
const std::vector<std::string> &async_file_pool,
|
||||||
|
std::vector<std::shared_ptr<TensorData>> *result_list) {
|
||||||
|
std::vector<char> *buffer = NULL;
|
||||||
|
std::string type_name = "";
|
||||||
|
std::vector<int64_t> shape;
|
||||||
|
uint64_t data_size = 0;
|
||||||
|
bool found = false;
|
||||||
|
std::vector<std::string> matched_paths;
|
||||||
|
// if async mode
|
||||||
|
for (const std::string &file_path : async_file_pool) {
|
||||||
|
if (file_path.find(specific_dump_dir) != std::string::npos &&
|
||||||
|
file_path.find(prefix_dump_to_check) != std::string::npos &&
|
||||||
|
file_path.find(slot_string_to_check) != std::string::npos) {
|
||||||
|
matched_paths.push_back(file_path);
|
||||||
|
found = true;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
if (found) {
|
||||||
|
shape.clear();
|
||||||
|
std::string result_path = GetNewestFilePath(matched_paths);
|
||||||
|
ReadTensorFromNpy(result_path, &type_name, &data_size, &shape, &buffer);
|
||||||
|
AddToTensorData(backend_name, slot, iteration, device_id, root_graph_id, is_output, data_size, type_name, shape,
|
||||||
|
buffer, result_list);
|
||||||
|
} else {
|
||||||
|
// If no npy file is found, add empty tensor data.
|
||||||
|
AddToTensorData(backend_name, slot, iteration, device_id, root_graph_id, is_output, 0, type_name, shape, buffer,
|
||||||
|
result_list);
|
||||||
|
MS_LOG(INFO) << "Target tensor has not been found.";
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
std::string DebugServices::GetStrippedFilename(const std::string &file_name) {
|
std::string DebugServices::GetStrippedFilename(const std::string &file_name) {
|
||||||
// strip off the task_id, stream_id, and timestamp, then compare
|
// strip off the task_id, stream_id, and timestamp, then compare
|
||||||
size_t first_dot = file_name.find(".");
|
size_t first_dot = file_name.find(".");
|
||||||
|
|
|
@ -186,6 +186,45 @@ class DebugServices {
|
||||||
}
|
}
|
||||||
};
|
};
|
||||||
|
|
||||||
|
struct TensorStat {
|
||||||
|
TensorStat(uint64_t data_size, int dtype, const std::vector<int64_t> &shape, bool is_bool, double max_value,
|
||||||
|
double min_value, double avg_value, int count, int neg_zero_count, int pos_zero_count, int nan_count,
|
||||||
|
int neg_inf_count, int pos_inf_count, int zero_count)
|
||||||
|
: data_size(data_size),
|
||||||
|
dtype(dtype),
|
||||||
|
shape(shape),
|
||||||
|
is_bool(is_bool),
|
||||||
|
max_value(max_value),
|
||||||
|
min_value(min_value),
|
||||||
|
avg_value(avg_value),
|
||||||
|
count(count),
|
||||||
|
neg_zero_count(neg_zero_count),
|
||||||
|
pos_zero_count(pos_zero_count),
|
||||||
|
nan_count(nan_count),
|
||||||
|
neg_inf_count(neg_inf_count),
|
||||||
|
pos_inf_count(pos_inf_count),
|
||||||
|
zero_count(zero_count) {}
|
||||||
|
|
||||||
|
TensorStat() = default;
|
||||||
|
|
||||||
|
uint64_t data_size = 0;
|
||||||
|
int dtype = 0;
|
||||||
|
std::vector<int64_t> shape = {0};
|
||||||
|
bool is_bool = false;
|
||||||
|
double max_value = std::numeric_limits<double>::lowest();
|
||||||
|
double min_value = std::numeric_limits<double>::max();
|
||||||
|
double avg_value = 0.0;
|
||||||
|
int count = 0;
|
||||||
|
int neg_zero_count = 0;
|
||||||
|
int pos_zero_count = 0;
|
||||||
|
int nan_count = 0;
|
||||||
|
int neg_inf_count = 0;
|
||||||
|
int pos_inf_count = 0;
|
||||||
|
int zero_count = 0;
|
||||||
|
};
|
||||||
|
|
||||||
|
TensorStat GetTensorStatistics(const std::shared_ptr<TensorData> &tensor);
|
||||||
|
|
||||||
void AddWatchpoint(
|
void AddWatchpoint(
|
||||||
unsigned int id, unsigned int watch_condition, float parameter,
|
unsigned int id, unsigned int watch_condition, float parameter,
|
||||||
const std::vector<std::tuple<std::string, bool>> &check_node_list, const std::vector<parameter_t> ¶meter_list,
|
const std::vector<std::tuple<std::string, bool>> &check_node_list, const std::vector<parameter_t> ¶meter_list,
|
||||||
|
@ -233,6 +272,17 @@ class DebugServices {
|
||||||
const std::vector<std::string> &async_file_pool,
|
const std::vector<std::string> &async_file_pool,
|
||||||
std::vector<std::shared_ptr<TensorData>> *result_list);
|
std::vector<std::shared_ptr<TensorData>> *result_list);
|
||||||
|
|
||||||
|
void ReadDumpedTensorSync(const std::string &prefix_dump_file_name, const std::string &specific_dump_dir,
|
||||||
|
const std::string &backend_name, size_t slot, unsigned int device_id,
|
||||||
|
unsigned int iteration, unsigned int root_graph_id, const bool &is_output,
|
||||||
|
std::vector<std::shared_ptr<TensorData>> *result_list);
|
||||||
|
|
||||||
|
void ReadDumpedTensorAsync(const std::string &specific_dump_dir, const std::string &prefix_dump_to_check,
|
||||||
|
const std::string &slot_string_to_check, const std::string &backend_name, size_t slot,
|
||||||
|
unsigned int device_id, unsigned int iteration, unsigned int root_graph_id,
|
||||||
|
const bool &is_output, const std::vector<std::string> &async_file_pool,
|
||||||
|
std::vector<std::shared_ptr<TensorData>> *result_list);
|
||||||
|
|
||||||
std::vector<std::shared_ptr<TensorData>> ReadNeededDumpedTensors(unsigned int iteration,
|
std::vector<std::shared_ptr<TensorData>> ReadNeededDumpedTensors(unsigned int iteration,
|
||||||
std::vector<std::string> *async_file_pool);
|
std::vector<std::string> *async_file_pool);
|
||||||
|
|
||||||
|
|
|
@ -226,7 +226,7 @@ unsigned int GetTensorSlot(tensor_info_t info) { return info.slot; }
|
||||||
|
|
||||||
bool GetTensorIsOutput(tensor_info_t info) { return info.is_output; }
|
bool GetTensorIsOutput(tensor_info_t info) { return info.is_output; }
|
||||||
|
|
||||||
std::vector<tensor_data_t> DbgServices::ReadTensors(std::vector<tensor_info_t> info) {
|
std::vector<std::shared_ptr<TensorData>> DbgServices::ReadTensorsUtil(std::vector<tensor_info_t> info) {
|
||||||
for (auto i : info) {
|
for (auto i : info) {
|
||||||
MS_LOG(INFO) << "cpp DbgServices ReadTensor info name " << i.node_name << ", slot " << i.slot << ", iteration "
|
MS_LOG(INFO) << "cpp DbgServices ReadTensor info name " << i.node_name << ", slot " << i.slot << ", iteration "
|
||||||
<< i.iteration << ", rank_id " << i.rank_id << ", root_graph_id " << i.root_graph_id << ", is_output "
|
<< i.iteration << ", rank_id " << i.rank_id << ", root_graph_id " << i.root_graph_id << ", is_output "
|
||||||
|
@ -238,7 +238,6 @@ std::vector<tensor_data_t> DbgServices::ReadTensors(std::vector<tensor_info_t> i
|
||||||
std::vector<unsigned int> iteration;
|
std::vector<unsigned int> iteration;
|
||||||
std::vector<size_t> slot;
|
std::vector<size_t> slot;
|
||||||
std::vector<std::shared_ptr<TensorData>> result_list;
|
std::vector<std::shared_ptr<TensorData>> result_list;
|
||||||
std::vector<tensor_data_t> tensors_read;
|
|
||||||
std::vector<bool> is_output;
|
std::vector<bool> is_output;
|
||||||
|
|
||||||
std::transform(info.begin(), info.end(), std::back_inserter(backend_name), GetTensorFullName);
|
std::transform(info.begin(), info.end(), std::back_inserter(backend_name), GetTensorFullName);
|
||||||
|
@ -264,10 +263,60 @@ std::vector<tensor_data_t> DbgServices::ReadTensors(std::vector<tensor_info_t> i
|
||||||
MS_LOG(INFO) << "ReadTensors Took: " << ms_double.count() / 1000 << "s";
|
MS_LOG(INFO) << "ReadTensors Took: " << ms_double.count() / 1000 << "s";
|
||||||
MS_LOG(INFO) << "cpp after";
|
MS_LOG(INFO) << "cpp after";
|
||||||
|
|
||||||
|
return result_list;
|
||||||
|
}
|
||||||
|
|
||||||
|
std::vector<tensor_data_t> DbgServices::ReadTensors(std::vector<tensor_info_t> info) {
|
||||||
|
std::vector<tensor_data_t> tensors_read;
|
||||||
|
std::vector<std::shared_ptr<TensorData>> result_list;
|
||||||
|
result_list = ReadTensorsUtil(info);
|
||||||
for (auto result : result_list) {
|
for (auto result : result_list) {
|
||||||
tensor_data_t tensor_data_item(result->GetDataPtr(), result->GetByteSize(), result->GetType(), result->GetShape());
|
tensor_data_t tensor_data_item(result->GetDataPtr(), result->GetByteSize(), result->GetType(), result->GetShape());
|
||||||
tensors_read.push_back(tensor_data_item);
|
tensors_read.push_back(tensor_data_item);
|
||||||
}
|
}
|
||||||
MS_LOG(INFO) << "cpp end";
|
|
||||||
return tensors_read;
|
return tensors_read;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
std::vector<TensorBaseData> DbgServices::ReadTensorsBase(std::vector<tensor_info_t> info) {
|
||||||
|
std::vector<TensorBaseData> tensors_read_base;
|
||||||
|
std::vector<std::shared_ptr<TensorData>> result_list;
|
||||||
|
result_list = ReadTensorsUtil(info);
|
||||||
|
for (auto result : result_list) {
|
||||||
|
if (!result->GetByteSize()) {
|
||||||
|
// tensor not found, adding empty tensor base.
|
||||||
|
TensorBaseData tensor_data_item(0, 0, {0});
|
||||||
|
tensors_read_base.push_back(tensor_data_item);
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
TensorBaseData tensor_data_item(result->GetByteSize(), result->GetType(), result->GetShape());
|
||||||
|
tensors_read_base.push_back(tensor_data_item);
|
||||||
|
}
|
||||||
|
return tensors_read_base;
|
||||||
|
}
|
||||||
|
|
||||||
|
std::vector<TensorStatData> DbgServices::ReadTensorsStat(std::vector<tensor_info_t> info) {
|
||||||
|
std::vector<TensorStatData> tensors_read_stat;
|
||||||
|
std::vector<std::shared_ptr<TensorData>> result_list;
|
||||||
|
result_list = ReadTensorsUtil(info);
|
||||||
|
for (auto result : result_list) {
|
||||||
|
if (!result->GetByteSize()) {
|
||||||
|
DebugServices::TensorStat tensor_statistics;
|
||||||
|
TensorStatData tensor_data_item(
|
||||||
|
tensor_statistics.data_size, tensor_statistics.dtype, tensor_statistics.shape, tensor_statistics.is_bool,
|
||||||
|
tensor_statistics.max_value, tensor_statistics.min_value, tensor_statistics.avg_value, tensor_statistics.count,
|
||||||
|
tensor_statistics.neg_zero_count, tensor_statistics.pos_zero_count, tensor_statistics.nan_count,
|
||||||
|
tensor_statistics.neg_inf_count, tensor_statistics.pos_inf_count, tensor_statistics.zero_count);
|
||||||
|
tensors_read_stat.push_back(tensor_data_item);
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
DebugServices::TensorStat tensor_statistics = debug_services_->GetTensorStatistics(result);
|
||||||
|
TensorStatData tensor_data_item(
|
||||||
|
tensor_statistics.data_size, tensor_statistics.dtype, tensor_statistics.shape, tensor_statistics.is_bool,
|
||||||
|
tensor_statistics.max_value, tensor_statistics.min_value, tensor_statistics.avg_value, tensor_statistics.count,
|
||||||
|
tensor_statistics.neg_zero_count, tensor_statistics.pos_zero_count, tensor_statistics.nan_count,
|
||||||
|
tensor_statistics.neg_inf_count, tensor_statistics.pos_inf_count, tensor_statistics.zero_count);
|
||||||
|
tensors_read_stat.push_back(tensor_data_item);
|
||||||
|
}
|
||||||
|
|
||||||
|
return tensors_read_stat;
|
||||||
|
}
|
||||||
|
|
|
@ -117,6 +117,68 @@ struct tensor_data_t {
|
||||||
std::vector<int64_t> shape;
|
std::vector<int64_t> shape;
|
||||||
};
|
};
|
||||||
|
|
||||||
|
struct TensorBaseData {
|
||||||
|
TensorBaseData(uint64_t data_size, int dtype, const std::vector<int64_t> &shape)
|
||||||
|
: data_size_(data_size), dtype_(dtype), shape_(shape) {}
|
||||||
|
|
||||||
|
const uint64_t data_size() const { return data_size_; }
|
||||||
|
const int dtype() const { return dtype_; }
|
||||||
|
const std::vector<int64_t> &shape() const { return shape_; }
|
||||||
|
uint64_t data_size_;
|
||||||
|
int dtype_;
|
||||||
|
std::vector<int64_t> shape_;
|
||||||
|
};
|
||||||
|
|
||||||
|
struct TensorStatData {
|
||||||
|
TensorStatData(uint64_t data_size, int dtype, const std::vector<int64_t> &shape, bool is_bool, double max_value,
|
||||||
|
double min_value, double avg_value, int count, int neg_zero_count, int pos_zero_count, int nan_count,
|
||||||
|
int neg_inf_count, int pos_inf_count, int zero_count)
|
||||||
|
: data_size_(data_size),
|
||||||
|
dtype_(dtype),
|
||||||
|
shape_(shape),
|
||||||
|
is_bool_(is_bool),
|
||||||
|
max_value_(max_value),
|
||||||
|
min_value_(min_value),
|
||||||
|
avg_value_(avg_value),
|
||||||
|
count_(count),
|
||||||
|
neg_zero_count_(neg_zero_count),
|
||||||
|
pos_zero_count_(pos_zero_count),
|
||||||
|
nan_count_(nan_count),
|
||||||
|
neg_inf_count_(neg_inf_count),
|
||||||
|
pos_inf_count_(pos_inf_count),
|
||||||
|
zero_count_(zero_count) {}
|
||||||
|
|
||||||
|
const uint64_t data_size() const { return data_size_; }
|
||||||
|
const int dtype() const { return dtype_; }
|
||||||
|
const std::vector<int64_t> &shape() const { return shape_; }
|
||||||
|
const bool is_bool() const { return is_bool_; }
|
||||||
|
const double max_value() const { return max_value_; }
|
||||||
|
const double min_value() const { return min_value_; }
|
||||||
|
const double avg_value() const { return avg_value_; }
|
||||||
|
const int count() const { return count_; }
|
||||||
|
const int neg_zero_count() const { return neg_zero_count_; }
|
||||||
|
const int pos_zero_count() const { return pos_zero_count_; }
|
||||||
|
const int nan_count() const { return nan_count_; }
|
||||||
|
const int neg_inf_count() const { return neg_inf_count_; }
|
||||||
|
const int pos_inf_count() const { return pos_inf_count_; }
|
||||||
|
const int zero_count() const { return zero_count_; }
|
||||||
|
|
||||||
|
uint64_t data_size_;
|
||||||
|
int dtype_;
|
||||||
|
std::vector<int64_t> shape_;
|
||||||
|
bool is_bool_;
|
||||||
|
double max_value_;
|
||||||
|
double min_value_;
|
||||||
|
double avg_value_;
|
||||||
|
int count_;
|
||||||
|
int neg_zero_count_;
|
||||||
|
int pos_zero_count_;
|
||||||
|
int nan_count_;
|
||||||
|
int neg_inf_count_;
|
||||||
|
int pos_inf_count_;
|
||||||
|
int zero_count_;
|
||||||
|
};
|
||||||
|
|
||||||
class DbgServices {
|
class DbgServices {
|
||||||
private:
|
private:
|
||||||
DebugServices *debug_services_;
|
DebugServices *debug_services_;
|
||||||
|
@ -141,8 +203,14 @@ class DbgServices {
|
||||||
|
|
||||||
std::vector<watchpoint_hit_t> CheckWatchpoints(unsigned int iteration);
|
std::vector<watchpoint_hit_t> CheckWatchpoints(unsigned int iteration);
|
||||||
|
|
||||||
|
std::vector<std::shared_ptr<TensorData>> ReadTensorsUtil(std::vector<tensor_info_t> info);
|
||||||
|
|
||||||
std::vector<tensor_data_t> ReadTensors(std::vector<tensor_info_t> info);
|
std::vector<tensor_data_t> ReadTensors(std::vector<tensor_info_t> info);
|
||||||
|
|
||||||
|
std::vector<TensorBaseData> ReadTensorsBase(std::vector<tensor_info_t> info);
|
||||||
|
|
||||||
|
std::vector<TensorStatData> ReadTensorsStat(std::vector<tensor_info_t> info);
|
||||||
|
|
||||||
std::string GetVersion();
|
std::string GetVersion();
|
||||||
};
|
};
|
||||||
|
|
||||||
|
|
|
@ -27,6 +27,8 @@ PYBIND11_MODULE(_mindspore_offline_debug, m) {
|
||||||
.def("RemoveWatchpoint", &DbgServices::RemoveWatchpoint)
|
.def("RemoveWatchpoint", &DbgServices::RemoveWatchpoint)
|
||||||
.def("CheckWatchpoints", &DbgServices::CheckWatchpoints)
|
.def("CheckWatchpoints", &DbgServices::CheckWatchpoints)
|
||||||
.def("ReadTensors", &DbgServices::ReadTensors)
|
.def("ReadTensors", &DbgServices::ReadTensors)
|
||||||
|
.def("ReadTensorsBase", &DbgServices::ReadTensorsBase)
|
||||||
|
.def("ReadTensorsStat", &DbgServices::ReadTensorsStat)
|
||||||
.def("GetVersion", &DbgServices::GetVersion);
|
.def("GetVersion", &DbgServices::GetVersion);
|
||||||
|
|
||||||
py::class_<parameter_t>(m, "parameter")
|
py::class_<parameter_t>(m, "parameter")
|
||||||
|
@ -63,4 +65,28 @@ PYBIND11_MODULE(_mindspore_offline_debug, m) {
|
||||||
.def("get_data_size", &tensor_data_t::get_data_size)
|
.def("get_data_size", &tensor_data_t::get_data_size)
|
||||||
.def("get_dtype", &tensor_data_t::get_dtype)
|
.def("get_dtype", &tensor_data_t::get_dtype)
|
||||||
.def("get_shape", &tensor_data_t::get_shape);
|
.def("get_shape", &tensor_data_t::get_shape);
|
||||||
|
|
||||||
|
py::class_<TensorBaseData>(m, "TensorBaseData")
|
||||||
|
.def(py::init<uint64_t, int, std::vector<int64_t>>())
|
||||||
|
.def("data_size", &TensorBaseData::data_size)
|
||||||
|
.def("dtype", &TensorBaseData::dtype)
|
||||||
|
.def("shape", &TensorBaseData::shape);
|
||||||
|
|
||||||
|
py::class_<TensorStatData>(m, "TensorStatData")
|
||||||
|
.def(
|
||||||
|
py::init<uint64_t, int, std::vector<int64_t>, bool, double, double, double, int, int, int, int, int, int, int>())
|
||||||
|
.def("data_size", &TensorStatData::data_size)
|
||||||
|
.def("dtype", &TensorStatData::dtype)
|
||||||
|
.def("shape", &TensorStatData::shape)
|
||||||
|
.def("is_bool", &TensorStatData::is_bool)
|
||||||
|
.def("max_value", &TensorStatData::max_value)
|
||||||
|
.def("min_value", &TensorStatData::min_value)
|
||||||
|
.def("avg_value", &TensorStatData::avg_value)
|
||||||
|
.def("count", &TensorStatData::count)
|
||||||
|
.def("neg_zero_count", &TensorStatData::neg_zero_count)
|
||||||
|
.def("pos_zero_count", &TensorStatData::pos_zero_count)
|
||||||
|
.def("nan_count", &TensorStatData::nan_count)
|
||||||
|
.def("neg_inf_count", &TensorStatData::neg_inf_count)
|
||||||
|
.def("pos_inf_count", &TensorStatData::pos_inf_count)
|
||||||
|
.def("zero_count", &TensorStatData::zero_count);
|
||||||
}
|
}
|
||||||
|
|
|
@ -20,6 +20,7 @@
|
||||||
#include <memory>
|
#include <memory>
|
||||||
#include <bitset>
|
#include <bitset>
|
||||||
#include <tuple>
|
#include <tuple>
|
||||||
|
#include <type_traits>
|
||||||
#include "debug/debugger/tensor_summary.h"
|
#include "debug/debugger/tensor_summary.h"
|
||||||
|
|
||||||
#ifdef OFFLINE_DBG_MODE
|
#ifdef OFFLINE_DBG_MODE
|
||||||
|
@ -92,39 +93,45 @@ double VarianceAndMeanCalculator::GetStandardDeviation() { return sqrt(GetVarian
|
||||||
|
|
||||||
template <typename T>
|
template <typename T>
|
||||||
TensorSummary<T>::TensorSummary(void *current_tensor_ptr, void *const previous_tensor_ptr, uint32_t num_elements)
|
TensorSummary<T>::TensorSummary(void *current_tensor_ptr, void *const previous_tensor_ptr, uint32_t num_elements)
|
||||||
: current_tensor_ptr(reinterpret_cast<T *>(current_tensor_ptr)),
|
: current_tensor_ptr_(reinterpret_cast<T *>(current_tensor_ptr)),
|
||||||
prev_tensor_ptr(reinterpret_cast<T *>(previous_tensor_ptr)),
|
prev_tensor_ptr_(reinterpret_cast<T *>(previous_tensor_ptr)),
|
||||||
num_elements(num_elements),
|
num_elements_(num_elements),
|
||||||
min(std::numeric_limits<double>::max()),
|
min_(std::numeric_limits<double>::max()),
|
||||||
max(std::numeric_limits<double>::lowest()),
|
max_(std::numeric_limits<double>::lowest()),
|
||||||
inf_count(0),
|
avg_(0.0),
|
||||||
nan_count(0),
|
is_bool_(false),
|
||||||
zero_count(0),
|
neg_zero_count_(0),
|
||||||
epsilon(1.0e-9),
|
pos_zero_count_(0),
|
||||||
mean_sd_cal_enabled(false) {}
|
pos_inf_count_(0),
|
||||||
|
neg_inf_count_(0),
|
||||||
|
inf_count_(0),
|
||||||
|
nan_count_(0),
|
||||||
|
zero_count_(0),
|
||||||
|
epsilon_(1.0e-9),
|
||||||
|
mean_sd_cal_enabled_(false) {}
|
||||||
|
|
||||||
template <typename T>
|
template <typename T>
|
||||||
void TensorSummary<T>::SummarizeTensor(const std::vector<DebugServices::watchpoint_t> &wps) {
|
void TensorSummary<T>::SummarizeTensor(const std::vector<DebugServices::watchpoint_t> &wps) {
|
||||||
InitCalculators(wps);
|
InitCalculators(wps);
|
||||||
for (size_t i = 0; i < num_elements; ++i) {
|
for (size_t i = 0; i < num_elements_; ++i) {
|
||||||
auto current_value = static_cast<double>(current_tensor_ptr[i]);
|
auto current_value = static_cast<double>(current_tensor_ptr_[i]);
|
||||||
double previous_value =
|
double previous_value =
|
||||||
prev_tensor_ptr ? static_cast<double>(prev_tensor_ptr[i]) : std::numeric_limits<double>::quiet_NaN();
|
prev_tensor_ptr_ ? static_cast<double>(prev_tensor_ptr_[i]) : std::numeric_limits<double>::quiet_NaN();
|
||||||
inf_count += std::isinf(current_value);
|
inf_count_ += std::isinf(current_value);
|
||||||
nan_count += std::isnan(current_value);
|
nan_count_ += std::isnan(current_value);
|
||||||
zero_count += (current_value == 0);
|
zero_count_ += (current_value == 0);
|
||||||
max = std::max(max, current_value);
|
max_ = std::max(max_, current_value);
|
||||||
min = std::min(min, current_value);
|
min_ = std::min(min_, current_value);
|
||||||
if (mean_sd_cal_enabled) {
|
if (mean_sd_cal_enabled_) {
|
||||||
current_mean_variance.ProcessElement(current_value);
|
current_mean_variance_.ProcessElement(current_value);
|
||||||
}
|
}
|
||||||
for (auto &it : all_close) {
|
for (auto &it : all_close_) {
|
||||||
it.second->ProcessElement(current_value, previous_value);
|
it.second->ProcessElement(current_value, previous_value);
|
||||||
}
|
}
|
||||||
for (auto &range_count : range_counts) {
|
for (auto &range_count : range_counts_) {
|
||||||
range_count.second->ProcessElement(current_value);
|
range_count.second->ProcessElement(current_value);
|
||||||
}
|
}
|
||||||
for (auto &mean : means) {
|
for (auto &mean : means_) {
|
||||||
if (mean.first == "curr_prev_diff_mean") {
|
if (mean.first == "curr_prev_diff_mean") {
|
||||||
mean.second->ProcessElement(std::abs(current_value - previous_value));
|
mean.second->ProcessElement(std::abs(current_value - previous_value));
|
||||||
} else if (mean.first == "abs_prev_mean") {
|
} else if (mean.first == "abs_prev_mean") {
|
||||||
|
@ -136,6 +143,39 @@ void TensorSummary<T>::SummarizeTensor(const std::vector<DebugServices::watchpoi
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
template <typename T>
|
||||||
|
void TensorSummary<T>::TensorStatistics(DbgDataType dtype_value) {
|
||||||
|
if (dtype_value == DT_BOOL) {
|
||||||
|
is_bool_ = true;
|
||||||
|
}
|
||||||
|
double sum_elements = 0.0;
|
||||||
|
for (size_t i = 0; i < num_elements_; ++i) {
|
||||||
|
auto current_value = static_cast<double>(current_tensor_ptr_[i]);
|
||||||
|
if (std::isinf(current_value)) {
|
||||||
|
if (current_value > 0) {
|
||||||
|
pos_inf_count_ += 1;
|
||||||
|
} else {
|
||||||
|
neg_inf_count_ += 1;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
zero_count_ += (current_value == 0);
|
||||||
|
nan_count_ += std::isnan(current_value);
|
||||||
|
if (!(std::isnan(current_value) || std::isinf(current_value))) {
|
||||||
|
// only considering tensor elements with value
|
||||||
|
if (std::signbit(current_value) && !(current_value == 0)) {
|
||||||
|
neg_zero_count_ += 1;
|
||||||
|
} else if (!(current_value == 0)) {
|
||||||
|
pos_zero_count_ += 1;
|
||||||
|
}
|
||||||
|
max_ = std::max(max_, current_value);
|
||||||
|
min_ = std::min(min_, current_value);
|
||||||
|
sum_elements += current_value;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
int value_count = zero_count_ + neg_zero_count_ + pos_zero_count_;
|
||||||
|
avg_ = sum_elements / value_count;
|
||||||
|
}
|
||||||
|
|
||||||
template <typename T>
|
template <typename T>
|
||||||
std::tuple<bool, int, std::vector<DebugServices::parameter_t>> TensorSummary<T>::IsWatchpointHit(
|
std::tuple<bool, int, std::vector<DebugServices::parameter_t>> TensorSummary<T>::IsWatchpointHit(
|
||||||
DebugServices::watchpoint_t wp) {
|
DebugServices::watchpoint_t wp) {
|
||||||
|
@ -145,24 +185,24 @@ std::tuple<bool, int, std::vector<DebugServices::parameter_t>> TensorSummary<T>:
|
||||||
std::bitset<bit_size> error_code;
|
std::bitset<bit_size> error_code;
|
||||||
CONDITION_TYPE type = wp.condition.type;
|
CONDITION_TYPE type = wp.condition.type;
|
||||||
// bit 0 denotes presence of nan
|
// bit 0 denotes presence of nan
|
||||||
error_code.set(0, nan_count > 0);
|
error_code.set(0, nan_count_ > 0);
|
||||||
// bit 1 denotes presence of inf
|
// bit 1 denotes presence of inf
|
||||||
error_code.set(1, inf_count > 0);
|
error_code.set(1, inf_count_ > 0);
|
||||||
|
|
||||||
if (type == CONDITION_TYPE::HAS_NAN) {
|
if (type == CONDITION_TYPE::HAS_NAN) {
|
||||||
error_code.reset();
|
error_code.reset();
|
||||||
hit = nan_count > 0;
|
hit = nan_count_ > 0;
|
||||||
} else if (type == CONDITION_TYPE::HAS_INF) {
|
} else if (type == CONDITION_TYPE::HAS_INF) {
|
||||||
error_code.reset();
|
error_code.reset();
|
||||||
hit = inf_count > 0;
|
hit = inf_count_ > 0;
|
||||||
} else if (type == CONDITION_TYPE::GENERAL_OVERFLOW) {
|
} else if (type == CONDITION_TYPE::GENERAL_OVERFLOW) {
|
||||||
error_code.reset();
|
error_code.reset();
|
||||||
hit = (nan_count + inf_count) > 0;
|
hit = (nan_count_ + inf_count_) > 0;
|
||||||
} else if (type == CONDITION_TYPE::NOT_CHANGED && prev_tensor_ptr && error_code.none()) {
|
} else if (type == CONDITION_TYPE::NOT_CHANGED && prev_tensor_ptr_ && error_code.none()) {
|
||||||
hit = all_close[wp.id]->IsAllClose();
|
hit = all_close_[wp.id]->IsAllClose();
|
||||||
} else if ((type == CONDITION_TYPE::NOT_CHANGED || type == CONDITION_TYPE::CHANGE_TOO_LARGE ||
|
} else if ((type == CONDITION_TYPE::NOT_CHANGED || type == CONDITION_TYPE::CHANGE_TOO_LARGE ||
|
||||||
type == CONDITION_TYPE::CHANGE_TOO_SMALL) &&
|
type == CONDITION_TYPE::CHANGE_TOO_SMALL) &&
|
||||||
!prev_tensor_ptr) {
|
!prev_tensor_ptr_) {
|
||||||
// bit 2 denotes absence of previous tensor
|
// bit 2 denotes absence of previous tensor
|
||||||
error_code.set(2, true);
|
error_code.set(2, true);
|
||||||
}
|
}
|
||||||
|
@ -196,26 +236,26 @@ double_t TensorSummary<T>::StatLookup(const std::string ¶meter_name, const D
|
||||||
}
|
}
|
||||||
|
|
||||||
if (param_type == "max") {
|
if (param_type == "max") {
|
||||||
return max;
|
return max_;
|
||||||
} else if (param_type == "min") {
|
} else if (param_type == "min") {
|
||||||
return min;
|
return min_;
|
||||||
} else if (param_type == "max_min") {
|
} else if (param_type == "max_min") {
|
||||||
return max - min;
|
return max_ - min_;
|
||||||
} else if (param_type == "mean") {
|
} else if (param_type == "mean") {
|
||||||
return current_mean_variance.GetMean();
|
return current_mean_variance_.GetMean();
|
||||||
} else if (param_type == "sd") {
|
} else if (param_type == "sd") {
|
||||||
return current_mean_variance.GetStandardDeviation();
|
return current_mean_variance_.GetStandardDeviation();
|
||||||
} else if (param_type == "abs_mean") {
|
} else if (param_type == "abs_mean") {
|
||||||
if (means.find("abs_current_mean") != means.end()) {
|
if (means_.find("abs_current_mean") != means_.end()) {
|
||||||
return means["abs_current_mean"]->GetMean();
|
return means_["abs_current_mean"]->GetMean();
|
||||||
}
|
}
|
||||||
} else if (param_type == "abs_mean_update_ratio" && prev_tensor_ptr) {
|
} else if (param_type == "abs_mean_update_ratio" && prev_tensor_ptr_) {
|
||||||
if (means.find("curr_prev_diff_mean") != means.end() && means.find("abs_prev_mean") != means.end()) {
|
if (means_.find("curr_prev_diff_mean") != means_.end() && means_.find("abs_prev_mean") != means_.end()) {
|
||||||
return means["curr_prev_diff_mean"]->GetMean() / (means["abs_prev_mean"]->GetMean() + epsilon);
|
return means_["curr_prev_diff_mean"]->GetMean() / (means_["abs_prev_mean"]->GetMean() + epsilon_);
|
||||||
}
|
}
|
||||||
} else if (param_type == "range_percentage") {
|
} else if (param_type == "range_percentage") {
|
||||||
if (range_counts.find(wp.id) != range_counts.end()) {
|
if (range_counts_.find(wp.id) != range_counts_.end()) {
|
||||||
return range_counts[wp.id]->GetPercentInRange();
|
return range_counts_[wp.id]->GetPercentInRange();
|
||||||
}
|
}
|
||||||
} else if (param_type == "zero_percentage") {
|
} else if (param_type == "zero_percentage") {
|
||||||
return GetZeroValPercent();
|
return GetZeroValPercent();
|
||||||
|
@ -227,54 +267,54 @@ template <typename T>
|
||||||
double_t TensorSummary<T>::StatLookup(const DebugServices::watchpoint_t &wp) {
|
double_t TensorSummary<T>::StatLookup(const DebugServices::watchpoint_t &wp) {
|
||||||
CONDITION_TYPE type = wp.condition.type;
|
CONDITION_TYPE type = wp.condition.type;
|
||||||
if (type == CONDITION_TYPE::MAX_LT || type == CONDITION_TYPE::MAX_GT) {
|
if (type == CONDITION_TYPE::MAX_LT || type == CONDITION_TYPE::MAX_GT) {
|
||||||
return max;
|
return max_;
|
||||||
} else if (type == CONDITION_TYPE::MIN_LT || type == CONDITION_TYPE::MIN_GT) {
|
} else if (type == CONDITION_TYPE::MIN_LT || type == CONDITION_TYPE::MIN_GT) {
|
||||||
return min;
|
return min_;
|
||||||
} else if (type == CONDITION_TYPE::MEAN_LT || type == CONDITION_TYPE::MEAN_GT) {
|
} else if (type == CONDITION_TYPE::MEAN_LT || type == CONDITION_TYPE::MEAN_GT) {
|
||||||
return current_mean_variance.GetMean();
|
return current_mean_variance_.GetMean();
|
||||||
} else if (type == CONDITION_TYPE::SD_LT || type == CONDITION_TYPE::SD_GT) {
|
} else if (type == CONDITION_TYPE::SD_LT || type == CONDITION_TYPE::SD_GT) {
|
||||||
return current_mean_variance.GetStandardDeviation();
|
return current_mean_variance_.GetStandardDeviation();
|
||||||
} else if (type == CONDITION_TYPE::MAX_MIN_GT || type == CONDITION_TYPE::MAX_MIN_LT) {
|
} else if (type == CONDITION_TYPE::MAX_MIN_GT || type == CONDITION_TYPE::MAX_MIN_LT) {
|
||||||
return max - min;
|
return max_ - min_;
|
||||||
}
|
}
|
||||||
return std::numeric_limits<double_t>::quiet_NaN();
|
return std::numeric_limits<double_t>::quiet_NaN();
|
||||||
}
|
}
|
||||||
|
|
||||||
template <typename T>
|
template <typename T>
|
||||||
double_t TensorSummary<T>::GetZeroValPercent() {
|
double_t TensorSummary<T>::GetZeroValPercent() {
|
||||||
if (num_elements == 0) {
|
if (num_elements_ == 0) {
|
||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
return (zero_count * 100.0) / num_elements;
|
return (zero_count_ * 100.0) / num_elements_;
|
||||||
}
|
}
|
||||||
|
|
||||||
template <typename T>
|
template <typename T>
|
||||||
void TensorSummary<T>::InitCalculators(const std::vector<DebugServices::watchpoint_t> &wps) {
|
void TensorSummary<T>::InitCalculators(const std::vector<DebugServices::watchpoint_t> &wps) {
|
||||||
for (auto &wp : wps) {
|
for (auto &wp : wps) {
|
||||||
auto wp_id = wp.id;
|
auto wp_id = wp.id;
|
||||||
mean_sd_cal_enabled = mean_sd_cal_enabled || wp.mean_sd_enabled();
|
mean_sd_cal_enabled_ = mean_sd_cal_enabled_ || wp.mean_sd_enabled();
|
||||||
if (wp.allclose_enabled() && prev_tensor_ptr) {
|
if (wp.allclose_enabled() && prev_tensor_ptr_) {
|
||||||
all_close[wp_id] = std::make_unique<AllCloseCalculator>();
|
all_close_[wp_id] = std::make_unique<AllCloseCalculator>();
|
||||||
if (!wp.parameter_list[0].disabled) {
|
if (!wp.parameter_list[0].disabled) {
|
||||||
all_close[wp_id]->set_atol(wp.parameter_list[0].value);
|
all_close_[wp_id]->set_atol(wp.parameter_list[0].value);
|
||||||
}
|
}
|
||||||
if (!wp.parameter_list[1].disabled) {
|
if (!wp.parameter_list[1].disabled) {
|
||||||
all_close[wp_id]->set_rtol(wp.parameter_list[1].value);
|
all_close_[wp_id]->set_rtol(wp.parameter_list[1].value);
|
||||||
}
|
}
|
||||||
} else if (wp.range_enabled()) {
|
} else if (wp.range_enabled()) {
|
||||||
range_counts[wp_id] = std::make_unique<RangeCountCalculator>();
|
range_counts_[wp_id] = std::make_unique<RangeCountCalculator>();
|
||||||
if (!wp.parameter_list[0].disabled) {
|
if (!wp.parameter_list[0].disabled) {
|
||||||
range_counts[wp_id]->set_range_start_inclusive(wp.parameter_list[0].value);
|
range_counts_[wp_id]->set_range_start_inclusive(wp.parameter_list[0].value);
|
||||||
}
|
}
|
||||||
if (!wp.parameter_list[1].disabled) {
|
if (!wp.parameter_list[1].disabled) {
|
||||||
range_counts[wp_id]->set_range_end_inclusive(wp.parameter_list[1].value);
|
range_counts_[wp_id]->set_range_end_inclusive(wp.parameter_list[1].value);
|
||||||
}
|
}
|
||||||
} else if (wp.tensor_update_ratio_mean_enabled() && prev_tensor_ptr) {
|
} else if (wp.tensor_update_ratio_mean_enabled() && prev_tensor_ptr_) {
|
||||||
means.insert({"curr_prev_diff_mean", std::make_unique<MeanCalculator>()});
|
means_.insert({"curr_prev_diff_mean", std::make_unique<MeanCalculator>()});
|
||||||
means.insert({"abs_prev_mean", std::make_unique<MeanCalculator>()});
|
means_.insert({"abs_prev_mean", std::make_unique<MeanCalculator>()});
|
||||||
} else if (wp.abs_mean_enabled()) {
|
} else if (wp.abs_mean_enabled()) {
|
||||||
means.insert({"abs_current_mean", std::make_unique<MeanCalculator>()});
|
means_.insert({"abs_current_mean", std::make_unique<MeanCalculator>()});
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
|
@ -92,6 +92,18 @@ class ITensorSummary {
|
||||||
virtual void SummarizeTensor(const std::vector<DebugServices::watchpoint_t> &) = 0;
|
virtual void SummarizeTensor(const std::vector<DebugServices::watchpoint_t> &) = 0;
|
||||||
virtual std::tuple<bool, int32_t, std::vector<DebugServices::parameter_t>> IsWatchpointHit(
|
virtual std::tuple<bool, int32_t, std::vector<DebugServices::parameter_t>> IsWatchpointHit(
|
||||||
DebugServices::watchpoint_t) = 0;
|
DebugServices::watchpoint_t) = 0;
|
||||||
|
virtual void TensorStatistics(DbgDataType) = 0;
|
||||||
|
virtual const bool is_bool() const = 0;
|
||||||
|
virtual const double max_value() const = 0;
|
||||||
|
virtual const double min_value() const = 0;
|
||||||
|
virtual const double avg_value() const = 0;
|
||||||
|
virtual const int count() const = 0;
|
||||||
|
virtual const int neg_zero_count() const = 0;
|
||||||
|
virtual const int pos_zero_count() const = 0;
|
||||||
|
virtual const int nan_count() const = 0;
|
||||||
|
virtual const int neg_inf_count() const = 0;
|
||||||
|
virtual const int pos_inf_count() const = 0;
|
||||||
|
virtual const int zero_count() const = 0;
|
||||||
};
|
};
|
||||||
|
|
||||||
template <typename T>
|
template <typename T>
|
||||||
|
@ -103,22 +115,40 @@ class TensorSummary : public ITensorSummary {
|
||||||
void SummarizeTensor(const std::vector<DebugServices::watchpoint_t> &) override;
|
void SummarizeTensor(const std::vector<DebugServices::watchpoint_t> &) override;
|
||||||
// returns hit, error_code, parameter_list
|
// returns hit, error_code, parameter_list
|
||||||
std::tuple<bool, int, std::vector<DebugServices::parameter_t>> IsWatchpointHit(DebugServices::watchpoint_t) override;
|
std::tuple<bool, int, std::vector<DebugServices::parameter_t>> IsWatchpointHit(DebugServices::watchpoint_t) override;
|
||||||
|
void TensorStatistics(DbgDataType) override;
|
||||||
|
const bool is_bool() const override { return is_bool_; }
|
||||||
|
const double max_value() const override { return max_; }
|
||||||
|
const double min_value() const override { return min_; }
|
||||||
|
const double avg_value() const override { return avg_; }
|
||||||
|
const int count() const override { return num_elements_; }
|
||||||
|
const int neg_zero_count() const override { return neg_zero_count_; }
|
||||||
|
const int pos_zero_count() const override { return pos_zero_count_; }
|
||||||
|
const int nan_count() const override { return nan_count_; }
|
||||||
|
const int neg_inf_count() const override { return neg_inf_count_; }
|
||||||
|
const int pos_inf_count() const override { return pos_inf_count_; }
|
||||||
|
const int zero_count() const override { return zero_count_; }
|
||||||
|
|
||||||
private:
|
private:
|
||||||
T *current_tensor_ptr;
|
T *current_tensor_ptr_;
|
||||||
T *prev_tensor_ptr;
|
T *prev_tensor_ptr_;
|
||||||
uint32_t num_elements;
|
uint32_t num_elements_;
|
||||||
double min;
|
double min_;
|
||||||
double max;
|
double max_;
|
||||||
uint32_t inf_count;
|
double avg_;
|
||||||
uint32_t nan_count;
|
bool is_bool_;
|
||||||
uint32_t zero_count;
|
uint32_t neg_zero_count_;
|
||||||
double epsilon;
|
uint32_t pos_zero_count_;
|
||||||
bool mean_sd_cal_enabled;
|
uint32_t pos_inf_count_;
|
||||||
VarianceAndMeanCalculator current_mean_variance;
|
uint32_t neg_inf_count_;
|
||||||
std::unordered_map<std::string, std::unique_ptr<MeanCalculator>> means;
|
uint32_t inf_count_;
|
||||||
std::unordered_map<uint32_t, std::unique_ptr<AllCloseCalculator>> all_close;
|
uint32_t nan_count_;
|
||||||
std::unordered_map<uint32_t, std::unique_ptr<RangeCountCalculator>> range_counts;
|
uint32_t zero_count_;
|
||||||
|
double epsilon_;
|
||||||
|
bool mean_sd_cal_enabled_;
|
||||||
|
VarianceAndMeanCalculator current_mean_variance_;
|
||||||
|
std::unordered_map<std::string, std::unique_ptr<MeanCalculator>> means_;
|
||||||
|
std::unordered_map<uint32_t, std::unique_ptr<AllCloseCalculator>> all_close_;
|
||||||
|
std::unordered_map<uint32_t, std::unique_ptr<RangeCountCalculator>> range_counts_;
|
||||||
double_t StatLookup(const DebugServices::watchpoint_t &);
|
double_t StatLookup(const DebugServices::watchpoint_t &);
|
||||||
double_t StatLookup(const std::string &, const DebugServices::watchpoint_t &);
|
double_t StatLookup(const std::string &, const DebugServices::watchpoint_t &);
|
||||||
double_t GetZeroValPercent();
|
double_t GetZeroValPercent();
|
||||||
|
|
|
@ -17,7 +17,10 @@ The module DbgServices provides offline debugger APIs.
|
||||||
"""
|
"""
|
||||||
|
|
||||||
import mindspore._mindspore_offline_debug as cds
|
import mindspore._mindspore_offline_debug as cds
|
||||||
from mindspore.offline_debug.mi_validators import check_init, check_initialize, check_add_watchpoint, check_remove_watchpoint, check_check_watchpoints, check_read_tensors, check_initialize_done, check_tensor_info_init, check_tensor_data_init, check_watchpoint_hit_init, check_parameter_init
|
from mindspore.offline_debug.mi_validators import check_init, check_initialize, check_add_watchpoint,\
|
||||||
|
check_remove_watchpoint, check_check_watchpoints, check_read_tensor_info, check_initialize_done, \
|
||||||
|
check_tensor_info_init, check_tensor_data_init, check_tensor_base_data_init, check_tensor_stat_data_init,\
|
||||||
|
check_watchpoint_hit_init, check_parameter_init
|
||||||
from mindspore.offline_debug.mi_validator_helpers import replace_minus_one
|
from mindspore.offline_debug.mi_validator_helpers import replace_minus_one
|
||||||
|
|
||||||
|
|
||||||
|
@ -238,7 +241,7 @@ class DbgServices():
|
||||||
return watchpoint_hit_list
|
return watchpoint_hit_list
|
||||||
|
|
||||||
@check_initialize_done
|
@check_initialize_done
|
||||||
@check_read_tensors
|
@check_read_tensor_info
|
||||||
def read_tensors(self, info):
|
def read_tensors(self, info):
|
||||||
"""
|
"""
|
||||||
Returning tensor data object describing the tensor requested tensor.
|
Returning tensor data object describing the tensor requested tensor.
|
||||||
|
@ -277,6 +280,83 @@ class DbgServices():
|
||||||
tensor_data_list_ret.append(tensor_data)
|
tensor_data_list_ret.append(tensor_data)
|
||||||
return tensor_data_list_ret
|
return tensor_data_list_ret
|
||||||
|
|
||||||
|
@check_initialize_done
|
||||||
|
@check_read_tensor_info
|
||||||
|
def read_tensor_base(self, info):
|
||||||
|
"""
|
||||||
|
Returning tensor base data object describing the requested tensor.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
info (list): List of TensorInfo objects.
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
list, TensorBaseData list.
|
||||||
|
|
||||||
|
Examples:
|
||||||
|
>>> from mindspore.ccsrc.debug.debugger.offline_debug import dbg_services
|
||||||
|
>>> d = dbg_services.DbgServices(dump_file_path="dump_file_path",
|
||||||
|
>>> verbose=True)
|
||||||
|
>>> d_init = d.initialize(is_sync_mode=True)
|
||||||
|
>>> tensor_base_data_list = d_init.read_tensor_base([dbg_services.TensorInfo(node_name="conv2.bias",
|
||||||
|
>>> slot=0,
|
||||||
|
>>> iteration=8,
|
||||||
|
>>> rank_id=5,
|
||||||
|
>>> root_graph_id=0,
|
||||||
|
>>> is_output=True)])
|
||||||
|
"""
|
||||||
|
log("in Python ReadTensorsBase info ", info)
|
||||||
|
info_list_inst = []
|
||||||
|
for elem in info:
|
||||||
|
log("in Python ReadTensorsBase info ", info)
|
||||||
|
info_list_inst.append(elem.instance)
|
||||||
|
tensor_base_data_list = self.dbg_instance.ReadTensorsBase(info_list_inst)
|
||||||
|
tensor_base_data_list_ret = []
|
||||||
|
for elem in tensor_base_data_list:
|
||||||
|
tensor_base_data = TensorBaseData(elem.data_size(), elem.dtype(), elem.shape())
|
||||||
|
tensor_base_data_list_ret.append(tensor_base_data)
|
||||||
|
return tensor_base_data_list_ret
|
||||||
|
|
||||||
|
@check_initialize_done
|
||||||
|
@check_read_tensor_info
|
||||||
|
def read_tensor_stats(self, info):
|
||||||
|
"""
|
||||||
|
Returning tensor statistics object describing the requested tensor.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
info (list): List of TensorInfo objects.
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
list, TensorStatData list.
|
||||||
|
|
||||||
|
Examples:
|
||||||
|
>>> from mindspore.ccsrc.debug.debugger.offline_debug import dbg_services
|
||||||
|
>>> d = dbg_services.DbgServices(dump_file_path="dump_file_path",
|
||||||
|
>>> verbose=True)
|
||||||
|
>>> d_init = d.initialize(is_sync_mode=True)
|
||||||
|
>>> tensor_stat_data_list = d_init.read_tensor_stats([dbg_services.TensorInfo(node_name="conv2.bias",
|
||||||
|
>>> slot=0,
|
||||||
|
>>> iteration=8,
|
||||||
|
>>> rank_id=5,
|
||||||
|
>>> root_graph_id=0,
|
||||||
|
>>> is_output=True)])
|
||||||
|
"""
|
||||||
|
log("in Python ReadTensorsStat info ", info)
|
||||||
|
info_list_inst = []
|
||||||
|
for elem in info:
|
||||||
|
log("in Python ReadTensorsStat info ", info)
|
||||||
|
info_list_inst.append(elem.instance)
|
||||||
|
tensor_stat_data_list = self.dbg_instance.ReadTensorsStat(info_list_inst)
|
||||||
|
tensor_stat_data_list_ret = []
|
||||||
|
for elem in tensor_stat_data_list:
|
||||||
|
tensor_stat_data = TensorStatData(elem.data_size(), elem.dtype(),
|
||||||
|
elem.shape(), elem.is_bool(),
|
||||||
|
elem.max_value(), elem.min_value(),
|
||||||
|
elem.avg_value(), elem.count(), elem.neg_zero_count(),
|
||||||
|
elem.pos_zero_count(), elem.nan_count(), elem.neg_inf_count(),
|
||||||
|
elem.pos_inf_count(), elem.zero_count())
|
||||||
|
tensor_stat_data_list_ret.append(tensor_stat_data)
|
||||||
|
return tensor_stat_data_list_ret
|
||||||
|
|
||||||
class TensorInfo():
|
class TensorInfo():
|
||||||
"""
|
"""
|
||||||
Tensor Information class.
|
Tensor Information class.
|
||||||
|
@ -527,6 +607,406 @@ class TensorData():
|
||||||
|
|
||||||
return self.instance.get_shape()
|
return self.instance.get_shape()
|
||||||
|
|
||||||
|
class TensorBaseData():
|
||||||
|
|
||||||
|
"""
|
||||||
|
TensorBaseData class.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
data_size (int): Size of data in bytes.
|
||||||
|
dtype (int): An encoding representing the type of TensorData.
|
||||||
|
shape (list): Shape of tensor.
|
||||||
|
|
||||||
|
Examples:
|
||||||
|
>>> from mindspore.ccsrc.debug.debugger.offline_debug import dbg_services
|
||||||
|
>>> tensor_base_data = dbg_services.TensorBaseData(data_size=4,
|
||||||
|
>>> dtype=0,
|
||||||
|
>>> shape=[2, 2])
|
||||||
|
"""
|
||||||
|
@check_tensor_base_data_init
|
||||||
|
def __init__(self, data_size, dtype, shape):
|
||||||
|
self.instance = cds.TensorBaseData(data_size, dtype, shape)
|
||||||
|
|
||||||
|
@property
|
||||||
|
def data_size(self):
|
||||||
|
"""
|
||||||
|
Function to receive TensorBaseData data_size.
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
int, data_size of TensorBaseData instance.
|
||||||
|
|
||||||
|
Examples:
|
||||||
|
>>> from mindspore.ccsrc.debug.debugger.offline_debug import dbg_services
|
||||||
|
>>> tensor_base_data = dbg_services.TensorBaseData(data_size=4,
|
||||||
|
>>> dtype=0,
|
||||||
|
>>> shape=[2, 2])
|
||||||
|
>>> data_size = tensor_base_data.data_size
|
||||||
|
"""
|
||||||
|
|
||||||
|
return self.instance.data_size()
|
||||||
|
|
||||||
|
@property
|
||||||
|
def dtype(self):
|
||||||
|
"""
|
||||||
|
Function to receive TensorBaseData dtype.
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
int, dtype of TensorBaseData instance.
|
||||||
|
|
||||||
|
Examples:
|
||||||
|
>>> from mindspore.ccsrc.debug.debugger.offline_debug import dbg_services
|
||||||
|
>>> tensor_base_data = dbg_services.TensorBaseData(data_size=4,
|
||||||
|
>>> dtype=0,
|
||||||
|
>>> shape=[2, 2])
|
||||||
|
>>> dtype = tensor_base_data.dtype
|
||||||
|
"""
|
||||||
|
|
||||||
|
return self.instance.dtype()
|
||||||
|
|
||||||
|
@property
|
||||||
|
def shape(self):
|
||||||
|
"""
|
||||||
|
Function to receive TensorBaseData shape.
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
list, shape of TensorBaseData instance.
|
||||||
|
|
||||||
|
Examples:
|
||||||
|
>>> from mindspore.ccsrc.debug.debugger.offline_debug import dbg_services
|
||||||
|
>>> tensor_base_data = dbg_services.TensorBaseData(data_size=4,
|
||||||
|
>>> dtype=0,
|
||||||
|
>>> shape=[2, 2])
|
||||||
|
>>> shape = tensor_base_data.shape
|
||||||
|
"""
|
||||||
|
|
||||||
|
return self.instance.shape()
|
||||||
|
class TensorStatData():
|
||||||
|
|
||||||
|
"""
|
||||||
|
TensorStatData class.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
data_size (int): Size of data in bytes.
|
||||||
|
dtype (int): An encoding representing the type of TensorData.
|
||||||
|
shape (list): Shape of tensor.
|
||||||
|
is_bool (bool): Whether the data type is bool
|
||||||
|
max_value (float): Maximum value in tensor's elements
|
||||||
|
min_value (float): Minimum value in tensor's elements
|
||||||
|
avg_value (float): Average value of all tensor's elements
|
||||||
|
count (int): Number of elements in tensor
|
||||||
|
neg_zero_count (int): Number of negative elements in tensor
|
||||||
|
pos_zero_count (int): Number of positive elements in tensor
|
||||||
|
nan_cout (int): Number of nan elements in tensor
|
||||||
|
neg_inf_count (int): Number of negative infinity elements in tensor
|
||||||
|
pos_inf_count (int): Number of positive infinity elements in tensor
|
||||||
|
zero_count (int): Total number of zero elements in tensor
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
Examples:
|
||||||
|
>>> from mindspore.ccsrc.debug.debugger.offline_debug import dbg_services
|
||||||
|
>>> tensor_stat_data = dbg_services.TensorStatData
|
||||||
|
>>> (data_size=4,
|
||||||
|
>>> dtype=0,
|
||||||
|
>>> shape=[2, 2], is_bool = false, max_value = 10.0,
|
||||||
|
>>> min_value = 0.0, avg_value = 5.0,
|
||||||
|
>>> count = 4, neg_zero_count = 0, pos_zero_count = 4, nan_count = 0,
|
||||||
|
>>> neg_inf_count, pos_inf_count, zero_count = 1)
|
||||||
|
"""
|
||||||
|
@check_tensor_stat_data_init
|
||||||
|
def __init__(self, data_size, dtype, shape, is_bool, max_value, min_value, avg_value, count,
|
||||||
|
neg_zero_count, pos_zero_count, nan_count, neg_inf_count, pos_inf_count, zero_count):
|
||||||
|
self.instance = cds.TensorStatData(data_size, dtype, shape, is_bool, max_value,
|
||||||
|
min_value, avg_value, count, neg_zero_count,
|
||||||
|
pos_zero_count, nan_count, neg_inf_count,
|
||||||
|
pos_inf_count, zero_count)
|
||||||
|
|
||||||
|
|
||||||
|
@property
|
||||||
|
def data_size(self):
|
||||||
|
"""
|
||||||
|
Function to receive TensorStatData data_size.
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
int, data_size of TensorStatData instance.
|
||||||
|
|
||||||
|
Examples:
|
||||||
|
>>> from mindspore.ccsrc.debug.debugger.offline_debug import dbg_services
|
||||||
|
>>> tensor_stat_data = dbg_services.TensorStatData
|
||||||
|
>>> (data_size=4,
|
||||||
|
>>> dtype=0,
|
||||||
|
>>> shape=[2, 2], is_bool = false, max_value = 10.0,
|
||||||
|
>>> min_value = 0.0, avg_value = 5.0,
|
||||||
|
>>> count = 4, neg_zero_count = 0, pos_zero_count = 4,
|
||||||
|
>> nan_count = 0, neg_inf_count, pos_inf_count, zero_count = 1)
|
||||||
|
>>> data_size = tensor_stat_data.data_size
|
||||||
|
"""
|
||||||
|
|
||||||
|
return self.instance.data_size()
|
||||||
|
|
||||||
|
@property
|
||||||
|
def dtype(self):
|
||||||
|
"""
|
||||||
|
Function to receive TensorStatData dtype.
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
int, dtype of TensorStatData instance.
|
||||||
|
|
||||||
|
Examples:
|
||||||
|
>>> from mindspore.ccsrc.debug.debugger.offline_debug import dbg_services
|
||||||
|
>>> tensor_stat_data = dbg_services.TensorStatData(data_size=4,
|
||||||
|
>>> dtype=0,
|
||||||
|
>>> shape=[2, 2], is_bool = false, max_value = 10.0,
|
||||||
|
>> min_value = 0.0, avg_value = 5.0,
|
||||||
|
>>> count = 4, neg_zero_count = 0, pos_zero_count = 4, nan_count = 0,
|
||||||
|
>>> neg_inf_count, pos_inf_count, zero_count = 1)
|
||||||
|
>>> dtype = tensor_stat_data.dtype
|
||||||
|
"""
|
||||||
|
|
||||||
|
return self.instance.dtype()
|
||||||
|
|
||||||
|
@property
|
||||||
|
def shape(self):
|
||||||
|
"""
|
||||||
|
Function to receive TensorStatData shape.
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
list, shape of TensorStatData instance.
|
||||||
|
|
||||||
|
Examples:
|
||||||
|
>>> from mindspore.ccsrc.debug.debugger.offline_debug import dbg_services
|
||||||
|
>>> tensor_stat_data = dbg_services.TensorStatData(data_size=4,
|
||||||
|
>>> dtype=0,
|
||||||
|
>>> shape=[2, 2], is_bool = false, max_value = 10.0,
|
||||||
|
>>> min_value = 0.0, avg_value = 5.0,
|
||||||
|
>>> count = 4, neg_zero_count = 0, pos_zero_count = 4, nan_count = 0,
|
||||||
|
>>> neg_inf_count, pos_inf_count, zero_count = 1)
|
||||||
|
>>> shape = tensor_stat_data.shape
|
||||||
|
"""
|
||||||
|
|
||||||
|
return self.instance.shape()
|
||||||
|
|
||||||
|
@property
|
||||||
|
def is_bool(self):
|
||||||
|
"""
|
||||||
|
Function to receive TensorStatData is_bool.
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
bool, Whether the tensor elements are bool.
|
||||||
|
|
||||||
|
Examples:
|
||||||
|
>>> from mindspore.ccsrc.debug.debugger.offline_debug import dbg_services
|
||||||
|
>>> tensor_stat_data = dbg_services.TensorStatData(data_size=4,
|
||||||
|
>>> dtype=0,
|
||||||
|
>>> shape=[2, 2], is_bool = false, max_value = 10.0,
|
||||||
|
>>> min_value = 0.0, avg_value = 5.0,
|
||||||
|
>>> count = 4, neg_zero_count = 0, pos_zero_count = 4, nan_count = 0,
|
||||||
|
>>> neg_inf_count, pos_inf_count, zero_count = 1)
|
||||||
|
>>> is_bool = tensor_stat_data.is_bool
|
||||||
|
"""
|
||||||
|
return self.instance.is_bool()
|
||||||
|
|
||||||
|
@property
|
||||||
|
def max_value(self):
|
||||||
|
"""
|
||||||
|
Function to receive TensorStatData max_value.
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
float, max_value of TensorStatData instance.
|
||||||
|
|
||||||
|
Examples:
|
||||||
|
>>> from mindspore.ccsrc.debug.debugger.offline_debug import dbg_services
|
||||||
|
>>> tensor_stat_data = dbg_services.TensorStatData(data_size=4,
|
||||||
|
>>> dtype=0,
|
||||||
|
>>> shape=[2, 2], is_bool = false, max_value = 10.0,
|
||||||
|
>>> min_value = 0.0, avg_value = 5.0,
|
||||||
|
>>> count = 4, neg_zero_count = 0, pos_zero_count = 4, nan_count = 0,
|
||||||
|
>>> neg_inf_count, pos_inf_count, zero_count = 1)
|
||||||
|
>>> max_value = tensor_stat_data.max_value
|
||||||
|
"""
|
||||||
|
return self.instance.max_value()
|
||||||
|
|
||||||
|
@property
|
||||||
|
def min_value(self):
|
||||||
|
"""
|
||||||
|
Function to receive TensorStatData min_value.
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
float, min_value of TensorStatData instance.
|
||||||
|
|
||||||
|
Examples:
|
||||||
|
>>> from mindspore.ccsrc.debug.debugger.offline_debug import dbg_services
|
||||||
|
>>> tensor_stat_data = dbg_services.TensorStatData(data_size=4,
|
||||||
|
>>> dtype=0,
|
||||||
|
>>> shape=[2, 2], is_bool = false, max_value = 10.0,
|
||||||
|
>>> min_value = 0.0, avg_value = 5.0,
|
||||||
|
>>> count = 4, neg_zero_count = 0, pos_zero_count = 4, nan_count = 0,
|
||||||
|
>>> neg_inf_count, pos_inf_count, zero_count = 1)
|
||||||
|
>>> min_value = tensor_stat_data.min_value
|
||||||
|
"""
|
||||||
|
return self.instance.min_value()
|
||||||
|
|
||||||
|
@property
|
||||||
|
def avg_value(self):
|
||||||
|
"""
|
||||||
|
Function to receive TensorStatData avg_value.
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
float, avg_value of TensorStatData instance.
|
||||||
|
|
||||||
|
Examples:
|
||||||
|
>>> from mindspore.ccsrc.debug.debugger.offline_debug import dbg_services
|
||||||
|
>>> tensor_stat_data = dbg_services.TensorStatData(data_size=4,
|
||||||
|
>>> dtype=0,
|
||||||
|
>>> shape=[2, 2], is_bool = false, max_value = 10.0,
|
||||||
|
>>> min_value = 0.0, avg_value = 5.0,
|
||||||
|
>>> count = 4, neg_zero_count = 0, pos_zero_count = 4, nan_count = 0,
|
||||||
|
>>> neg_inf_count, pos_inf_count, zero_count = 1)
|
||||||
|
>>> avg_value = tensor_stat_data.avg_value
|
||||||
|
"""
|
||||||
|
return self.instance.avg_value()
|
||||||
|
|
||||||
|
@property
|
||||||
|
def count(self):
|
||||||
|
"""
|
||||||
|
Function to receive TensorStatData count.
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
int, count of TensorStatData instance.
|
||||||
|
|
||||||
|
Examples:
|
||||||
|
>>> from mindspore.ccsrc.debug.debugger.offline_debug import dbg_services
|
||||||
|
>>> tensor_stat_data = dbg_services.TensorStatData(data_size=4,
|
||||||
|
>>> dtype=0,
|
||||||
|
>>> shape=[2, 2], is_bool = false, max_value = 10.0,
|
||||||
|
>>> min_value = 0.0, avg_value = 5.0,
|
||||||
|
>>> count = 4, neg_zero_count = 0, pos_zero_count = 4, nan_count = 0,
|
||||||
|
>>> neg_inf_count, pos_inf_count, zero_count = 1)
|
||||||
|
>>> count = tensor_stat_data.count
|
||||||
|
"""
|
||||||
|
return self.instance.count()
|
||||||
|
|
||||||
|
@property
|
||||||
|
def neg_zero_count(self):
|
||||||
|
"""
|
||||||
|
Function to receive TensorStatData neg_zero_count.
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
int, neg_zero_count of TensorStatData instance.
|
||||||
|
|
||||||
|
Examples:
|
||||||
|
>>> from mindspore.ccsrc.debug.debugger.offline_debug import dbg_services
|
||||||
|
>>> tensor_stat_data = dbg_services.TensorStatData(data_size=4,
|
||||||
|
>>> dtype=0,
|
||||||
|
>>> shape=[2, 2], is_bool = false, max_value = 10.0,
|
||||||
|
>>> min_value = 0.0, avg_value = 5.0,
|
||||||
|
>>> count = 4, neg_zero_count = 0, pos_zero_count = 4, nan_count = 0,
|
||||||
|
>>> neg_inf_count, pos_inf_count, zero_count = 1)
|
||||||
|
>>> neg_zero_count = tensor_stat_data.neg_zero_count
|
||||||
|
"""
|
||||||
|
return self.instance.neg_zero_count()
|
||||||
|
|
||||||
|
@property
|
||||||
|
def pos_zero_count(self):
|
||||||
|
"""
|
||||||
|
Function to receive TensorStatData pos_zero_count.
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
int, pos_zero_count of TensorStatData instance.
|
||||||
|
|
||||||
|
Examples:
|
||||||
|
>>> from mindspore.ccsrc.debug.debugger.offline_debug import dbg_services
|
||||||
|
>>> tensor_stat_data = dbg_services.TensorStatData(data_size=4,
|
||||||
|
>>> dtype=0,
|
||||||
|
>>> shape=[2, 2], is_bool = false, max_value = 10.0,
|
||||||
|
>>> min_value = 0.0, avg_value = 5.0,
|
||||||
|
>>> count = 4, neg_zero_count = 0, pos_zero_count = 4, nan_count = 0,
|
||||||
|
>>> neg_inf_count, pos_inf_count, zero_count = 1)
|
||||||
|
>>> pos_zero_count = tensor_stat_data.pos_zero_count
|
||||||
|
"""
|
||||||
|
return self.instance.pos_zero_count()
|
||||||
|
|
||||||
|
@property
|
||||||
|
def zero_count(self):
|
||||||
|
"""
|
||||||
|
Function to receive TensorStatData zero_count.
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
int, zero_count of TensorStatData instance.
|
||||||
|
|
||||||
|
Examples:
|
||||||
|
>>> from mindspore.ccsrc.debug.debugger.offline_debug import dbg_services
|
||||||
|
>>> tensor_stat_data = dbg_services.TensorStatData(data_size=4,
|
||||||
|
>>> dtype=0,
|
||||||
|
>>> shape=[2, 2], is_bool = false, max_value = 10.0,
|
||||||
|
>>> min_value = 0.0, avg_value = 5.0,
|
||||||
|
>>> count = 4, neg_zero_count = 0, pos_zero_count = 4, nan_count = 0,
|
||||||
|
>>> neg_inf_count, pos_inf_count, zero_count = 1)
|
||||||
|
>>> zero_count = tensor_stat_data.zero_count
|
||||||
|
"""
|
||||||
|
return self.instance.zero_count()
|
||||||
|
|
||||||
|
@property
|
||||||
|
def nan_count(self):
|
||||||
|
"""
|
||||||
|
Function to receive TensorStatData nan_count.
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
int, nan_count of TensorStatData instance.
|
||||||
|
|
||||||
|
Examples:
|
||||||
|
>>> from mindspore.ccsrc.debug.debugger.offline_debug import dbg_services
|
||||||
|
>>> tensor_stat_data = dbg_services.TensorStatData(data_size=4,
|
||||||
|
>>> dtype=0,
|
||||||
|
>>> shape=[2, 2], is_bool = false, max_value = 10.0,
|
||||||
|
>>> min_value = 0.0, avg_value = 5.0,
|
||||||
|
>>> count = 4, neg_zero_count = 0, pos_zero_count = 4, nan_count = 0,
|
||||||
|
>>> neg_inf_count, pos_inf_count, zero_count = 1)
|
||||||
|
>>> nan_count = tensor_stat_data.nan_count
|
||||||
|
"""
|
||||||
|
return self.instance.nan_count()
|
||||||
|
|
||||||
|
@property
|
||||||
|
def neg_inf_count(self):
|
||||||
|
"""
|
||||||
|
Function to receive TensorStatData shape.
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
int, neg_inf_count of TensorStatData instance.
|
||||||
|
|
||||||
|
Examples:
|
||||||
|
>>> from mindspore.ccsrc.debug.debugger.offline_debug import dbg_services
|
||||||
|
>>> tensor_stat_data = dbg_services.TensorStatData(data_size=4,
|
||||||
|
>>> dtype=0,
|
||||||
|
>>> shape=[2, 2], is_bool = false, max_value = 10.0,
|
||||||
|
>>> min_value = 0.0, avg_value = 5.0,
|
||||||
|
>>> count = 4, neg_zero_count = 0, pos_zero_count = 4, nan_count = 0,
|
||||||
|
>>> neg_inf_count, pos_inf_count, zero_count = 1)
|
||||||
|
>>> neg_inf_count = tensor_stat_data.neg_inf_count
|
||||||
|
"""
|
||||||
|
return self.instance.neg_inf_count()
|
||||||
|
|
||||||
|
@property
|
||||||
|
def pos_inf_count(self):
|
||||||
|
"""
|
||||||
|
Function to receive TensorStatData pos_inf_count.
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
pos_inf_count of TensorStatData instance (int).
|
||||||
|
|
||||||
|
Examples:
|
||||||
|
>>> from mindspore.ccsrc.debug.debugger.offline_debug import dbg_services
|
||||||
|
>>> tensor_stat_data = dbg_services.TensorStatData(data_size=4,
|
||||||
|
>>> dtype=0,
|
||||||
|
>>> shape=[2, 2], is_bool = false, max_value = 10.0,
|
||||||
|
>>> min_value = 0.0, avg_value = 5.0,
|
||||||
|
>>> count = 4, neg_zero_count = 0, pos_zero_count = 1, nan_count = 0,
|
||||||
|
>>> neg_inf_count, pos_inf_count, zero_count = 1)
|
||||||
|
>>> pos_inf_count = tensor_stat_data.pos_inf_count
|
||||||
|
"""
|
||||||
|
return self.instance.pos_inf_count()
|
||||||
|
|
||||||
class WatchpointHit():
|
class WatchpointHit():
|
||||||
"""
|
"""
|
||||||
WatchpointHit class.
|
WatchpointHit class.
|
||||||
|
@ -583,7 +1063,7 @@ class WatchpointHit():
|
||||||
>>> name = watchpoint_hit.name
|
>>> name = watchpoint_hit.name
|
||||||
"""
|
"""
|
||||||
|
|
||||||
return self.instance.get_name()
|
return self.instance.name()
|
||||||
|
|
||||||
@property
|
@property
|
||||||
def slot(self):
|
def slot(self):
|
||||||
|
@ -606,7 +1086,7 @@ class WatchpointHit():
|
||||||
>>> slot = watchpoint_hit.slot
|
>>> slot = watchpoint_hit.slot
|
||||||
"""
|
"""
|
||||||
|
|
||||||
return self.instance.get_slot()
|
return self.instance.slot()
|
||||||
|
|
||||||
@property
|
@property
|
||||||
def condition(self):
|
def condition(self):
|
||||||
|
@ -629,7 +1109,7 @@ class WatchpointHit():
|
||||||
>>> condition = watchpoint_hit.condition
|
>>> condition = watchpoint_hit.condition
|
||||||
"""
|
"""
|
||||||
|
|
||||||
return self.instance.get_condition()
|
return self.instance.condition()
|
||||||
|
|
||||||
@property
|
@property
|
||||||
def watchpoint_id(self):
|
def watchpoint_id(self):
|
||||||
|
@ -652,7 +1132,7 @@ class WatchpointHit():
|
||||||
>>> watchpoint_id = watchpoint_hit.watchpoint_id
|
>>> watchpoint_id = watchpoint_hit.watchpoint_id
|
||||||
"""
|
"""
|
||||||
|
|
||||||
return self.instance.get_watchpoint_id()
|
return self.instance.watchpoint_id()
|
||||||
|
|
||||||
@property
|
@property
|
||||||
def parameters(self):
|
def parameters(self):
|
||||||
|
@ -675,7 +1155,7 @@ class WatchpointHit():
|
||||||
>>> parameters = watchpoint_hit.parameters
|
>>> parameters = watchpoint_hit.parameters
|
||||||
"""
|
"""
|
||||||
|
|
||||||
params = self.instance.get_parameters()
|
params = self.instance.parameters()
|
||||||
param_list = []
|
param_list = []
|
||||||
for elem in params:
|
for elem in params:
|
||||||
tmp = Parameter(elem.get_name(),
|
tmp = Parameter(elem.get_name(),
|
||||||
|
|
|
@ -121,7 +121,7 @@ def check_check_watchpoints(method):
|
||||||
return new_method
|
return new_method
|
||||||
|
|
||||||
|
|
||||||
def check_read_tensors(method):
|
def check_read_tensor_info(method):
|
||||||
"""Wrapper method to check the parameters of DbgServices ReadTensors."""
|
"""Wrapper method to check the parameters of DbgServices ReadTensors."""
|
||||||
|
|
||||||
@wraps(method)
|
@wraps(method)
|
||||||
|
@ -189,6 +189,52 @@ def check_tensor_data_init(method):
|
||||||
|
|
||||||
return new_method
|
return new_method
|
||||||
|
|
||||||
|
def check_tensor_base_data_init(method):
|
||||||
|
"""Wrapper method to check the parameters of DbgServices TensorBaseData init."""
|
||||||
|
|
||||||
|
@wraps(method)
|
||||||
|
def new_method(self, *args, **kwargs):
|
||||||
|
[data_size, dtype, shape], _ = parse_user_args(method, *args, **kwargs)
|
||||||
|
|
||||||
|
check_uint64(data_size, "data_size")
|
||||||
|
type_check(dtype, (int,), "dtype")
|
||||||
|
shape_names = ["shape_{0}".format(i) for i in range(len(shape))]
|
||||||
|
type_check_list(shape, (int,), shape_names)
|
||||||
|
|
||||||
|
return method(self, *args, **kwargs)
|
||||||
|
|
||||||
|
return new_method
|
||||||
|
|
||||||
|
def check_tensor_stat_data_init(method):
|
||||||
|
"""Wrapper method to check the parameters of DbgServices TensorBaseData init."""
|
||||||
|
|
||||||
|
@wraps(method)
|
||||||
|
def new_method(self, *args, **kwargs):
|
||||||
|
[data_size, dtype, shape, is_bool, max_value, min_value,
|
||||||
|
avg_value, count, neg_zero_count, pos_zero_count,
|
||||||
|
nan_count, neg_inf_count, pos_inf_count,
|
||||||
|
zero_count], _ = parse_user_args(method, *args, **kwargs)
|
||||||
|
|
||||||
|
check_uint64(data_size, "data_size")
|
||||||
|
type_check(dtype, (int,), "dtype")
|
||||||
|
shape_names = ["shape_{0}".format(i) for i in range(len(shape))]
|
||||||
|
type_check_list(shape, (int,), shape_names)
|
||||||
|
type_check(is_bool, (bool,), "is_bool")
|
||||||
|
type_check(max_value, (float,), "max_value")
|
||||||
|
type_check(min_value, (float,), "min_value")
|
||||||
|
type_check(avg_value, (float,), "avg_value")
|
||||||
|
type_check(count, (int,), "count")
|
||||||
|
type_check(neg_zero_count, (int,), "neg_zero_count")
|
||||||
|
type_check(pos_zero_count, (int,), "pos_zero_count")
|
||||||
|
type_check(nan_count, (int,), "nan_count")
|
||||||
|
type_check(neg_inf_count, (int,), "neg_inf_count")
|
||||||
|
type_check(pos_inf_count, (int,), "pos_inf_count")
|
||||||
|
type_check(zero_count, (int,), "zero_count")
|
||||||
|
|
||||||
|
|
||||||
|
return method(self, *args, **kwargs)
|
||||||
|
|
||||||
|
return new_method
|
||||||
|
|
||||||
def check_watchpoint_hit_init(method):
|
def check_watchpoint_hit_init(method):
|
||||||
"""Wrapper method to check the parameters of DbgServices WatchpointHit init."""
|
"""Wrapper method to check the parameters of DbgServices WatchpointHit init."""
|
||||||
|
|
|
@ -0,0 +1,87 @@
|
||||||
|
-----------------------------------------------------------
|
||||||
|
tensor_info_1 attributes:
|
||||||
|
node name = Default/Add-op4
|
||||||
|
slot = 0
|
||||||
|
iteration = 0
|
||||||
|
rank_id = 0
|
||||||
|
root_graph_id = 0
|
||||||
|
is_output = True
|
||||||
|
|
||||||
|
tensor_base_info:
|
||||||
|
size in bytes = 24
|
||||||
|
debugger dtype = 11
|
||||||
|
shape = [2, 3]
|
||||||
|
|
||||||
|
tensor_stat_info:
|
||||||
|
size in bytes = 24
|
||||||
|
debugger dtype = 11
|
||||||
|
shape = [2, 3]
|
||||||
|
is_bool = False
|
||||||
|
max_value = 10.0
|
||||||
|
min_value = -11.0
|
||||||
|
avg_value = 0.880000114440918
|
||||||
|
count = 6
|
||||||
|
neg_zero_count = 2
|
||||||
|
pos_zero_count = 3
|
||||||
|
nan_count = 0
|
||||||
|
neg_inf_count = 0
|
||||||
|
pos_inf_count = 0
|
||||||
|
zero_count = 1
|
||||||
|
-----------------------------------------------------------
|
||||||
|
tensor_info_2 attributes:
|
||||||
|
node name = Default/Reciprocal-op3
|
||||||
|
slot = 0
|
||||||
|
iteration = 0
|
||||||
|
rank_id = 0
|
||||||
|
root_graph_id = 0
|
||||||
|
is_output = True
|
||||||
|
|
||||||
|
tensor_base_info:
|
||||||
|
size in bytes = 40
|
||||||
|
debugger dtype = 11
|
||||||
|
shape = [2, 5]
|
||||||
|
|
||||||
|
tensor_stat_info:
|
||||||
|
size in bytes = 40
|
||||||
|
debugger dtype = 11
|
||||||
|
shape = [2, 5]
|
||||||
|
is_bool = False
|
||||||
|
max_value = 1.0
|
||||||
|
min_value = 1.0
|
||||||
|
avg_value = 1.0
|
||||||
|
count = 10
|
||||||
|
neg_zero_count = 0
|
||||||
|
pos_zero_count = 2
|
||||||
|
nan_count = 0
|
||||||
|
neg_inf_count = 3
|
||||||
|
pos_inf_count = 5
|
||||||
|
zero_count = 0
|
||||||
|
-----------------------------------------------------------
|
||||||
|
tensor_info_3 attributes:
|
||||||
|
node name = Default/network-WithLossCell/_backbone-MockModel/ReduceMean-op92
|
||||||
|
slot = 0
|
||||||
|
iteration = 0
|
||||||
|
rank_id = 0
|
||||||
|
root_graph_id = 0
|
||||||
|
is_output = True
|
||||||
|
|
||||||
|
tensor_base_info:
|
||||||
|
size in bytes = 20
|
||||||
|
debugger dtype = 11
|
||||||
|
shape = [5]
|
||||||
|
|
||||||
|
tensor_stat_info:
|
||||||
|
size in bytes = 20
|
||||||
|
debugger dtype = 11
|
||||||
|
shape = [5]
|
||||||
|
is_bool = False
|
||||||
|
max_value = 1.9901361465454102
|
||||||
|
min_value = -2.175431728363037
|
||||||
|
avg_value = -0.6648297309875488
|
||||||
|
count = 5
|
||||||
|
neg_zero_count = 2
|
||||||
|
pos_zero_count = 1
|
||||||
|
nan_count = 2
|
||||||
|
neg_inf_count = 0
|
||||||
|
pos_inf_count = 0
|
||||||
|
zero_count = 0
|
|
@ -0,0 +1,146 @@
|
||||||
|
# Copyright 2021 Huawei Technologies Co., Ltd
|
||||||
|
#
|
||||||
|
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||||
|
# you may not use this file except in compliance with the License.
|
||||||
|
# You may obtain a copy of the License at
|
||||||
|
#
|
||||||
|
# http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
#
|
||||||
|
# Unless required by applicable law or agreed to in writing, software
|
||||||
|
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
# See the License for the specific language governing permissions and
|
||||||
|
# limitations under the License.
|
||||||
|
# ==============================================================================
|
||||||
|
"""
|
||||||
|
Read tensor base and statistics test script for offline debugger APIs.
|
||||||
|
"""
|
||||||
|
|
||||||
|
import tempfile
|
||||||
|
import os
|
||||||
|
import shutil
|
||||||
|
import numpy as np
|
||||||
|
import mindspore.offline_debug.dbg_services as d
|
||||||
|
from dump_test_utils import compare_actual_with_expected
|
||||||
|
|
||||||
|
GENERATE_GOLDEN = False
|
||||||
|
test_name = "sync_read_tensors_base_stat"
|
||||||
|
|
||||||
|
|
||||||
|
def test_sync_read_tensors_base_stat():
|
||||||
|
|
||||||
|
value_tensor = np.array([[7.5, 8.56, -9.78], [10.0, -11.0, 0.0]], np.float32)
|
||||||
|
inf_tensor = np.array([[1., -np.inf, np.inf, -np.inf, np.inf], [np.inf, 1., -np.inf, np.inf, np.inf]], np.float32)
|
||||||
|
nan_tensor = np.array([-2.1754317, 1.9901361, np.nan, np.nan, -1.8091936], np.float32)
|
||||||
|
|
||||||
|
value_path = build_dump_file_structure(value_tensor, "Add", "Add.Add-op4.0.0.")
|
||||||
|
inf_path = build_dump_file_structure(inf_tensor, "Inf", "Reciprocal.Reciprocal-op3.0.0.")
|
||||||
|
nan_path = build_dump_file_structure(nan_tensor, "Nan", "ReduceMean.ReduceMean-op92.0.0.")
|
||||||
|
|
||||||
|
debugger_backend = d.DbgServices(
|
||||||
|
dump_file_path=value_path, verbose=True)
|
||||||
|
|
||||||
|
_ = debugger_backend.initialize(
|
||||||
|
net_name="Add", is_sync_mode=True)
|
||||||
|
|
||||||
|
debugger_backend_2 = d.DbgServices(
|
||||||
|
dump_file_path=inf_path, verbose=True)
|
||||||
|
|
||||||
|
_ = debugger_backend_2.initialize(
|
||||||
|
net_name="Inf", is_sync_mode=True)
|
||||||
|
|
||||||
|
debugger_backend_3 = d.DbgServices(
|
||||||
|
dump_file_path=nan_path, verbose=True)
|
||||||
|
|
||||||
|
_ = debugger_backend_3.initialize(
|
||||||
|
net_name="Nan", is_sync_mode=True)
|
||||||
|
|
||||||
|
info1 = d.TensorInfo(node_name="Default/Add-op4",
|
||||||
|
slot=0, iteration=0, rank_id=0, root_graph_id=0, is_output=True)
|
||||||
|
info2 = d.TensorInfo(node_name="Default/Reciprocal-op3",
|
||||||
|
slot=0, iteration=0, rank_id=0, root_graph_id=0, is_output=True)
|
||||||
|
info3 = d.TensorInfo(node_name="Default/network-WithLossCell/_backbone-MockModel/ReduceMean-op92",
|
||||||
|
slot=0, iteration=0, rank_id=0, root_graph_id=0, is_output=True)
|
||||||
|
|
||||||
|
|
||||||
|
tensor_info_1 = [info1]
|
||||||
|
tensor_info_2 = [info2]
|
||||||
|
tensor_info_3 = [info3]
|
||||||
|
tensor_info = [info1, info2, info3]
|
||||||
|
|
||||||
|
tensor_base_data_list = debugger_backend.read_tensor_base(tensor_info_1)
|
||||||
|
tensor_base_data_list_2 = debugger_backend_2.read_tensor_base(tensor_info_2)
|
||||||
|
tensor_base_data_list.extend(tensor_base_data_list_2)
|
||||||
|
tensor_base_data_list_3 = debugger_backend_3.read_tensor_base(tensor_info_3)
|
||||||
|
tensor_base_data_list.extend(tensor_base_data_list_3)
|
||||||
|
|
||||||
|
tensor_stat_data_list = debugger_backend.read_tensor_stats(tensor_info_1)
|
||||||
|
tensor_stat_data_list_2 = debugger_backend_2.read_tensor_stats(tensor_info_2)
|
||||||
|
tensor_stat_data_list.extend(tensor_stat_data_list_2)
|
||||||
|
tensor_stat_data_list_3 = debugger_backend_3.read_tensor_stats(tensor_info_3)
|
||||||
|
tensor_stat_data_list.extend(tensor_stat_data_list_3)
|
||||||
|
|
||||||
|
shutil.rmtree(value_path)
|
||||||
|
shutil.rmtree(inf_path)
|
||||||
|
shutil.rmtree(nan_path)
|
||||||
|
print_read_tensors(tensor_info, tensor_base_data_list, tensor_stat_data_list)
|
||||||
|
assert compare_actual_with_expected(test_name)
|
||||||
|
|
||||||
|
|
||||||
|
def build_dump_file_structure(tensor_array, net_name, tensor_name):
|
||||||
|
debugger_temp_dir = tempfile.mkdtemp(prefix=net_name, dir="./")
|
||||||
|
print(debugger_temp_dir)
|
||||||
|
path = os.path.join(debugger_temp_dir, "rank_0", net_name, "0", "0")
|
||||||
|
print(path)
|
||||||
|
os.makedirs(path, exist_ok=True)
|
||||||
|
file = tempfile.mkstemp(prefix=tensor_name, suffix=".output.0.DefaultFormat.npy", dir=path)
|
||||||
|
full_path = file[1]
|
||||||
|
np.save(full_path, tensor_array)
|
||||||
|
|
||||||
|
return debugger_temp_dir
|
||||||
|
|
||||||
|
def print_read_tensors(tensor_info, tensor_base_data_list, tensor_stat_data_list):
|
||||||
|
"""Print read tensors info."""
|
||||||
|
if GENERATE_GOLDEN:
|
||||||
|
f_write = open(test_name + ".expected", "w")
|
||||||
|
else:
|
||||||
|
f_write = open(test_name + ".actual", "w")
|
||||||
|
|
||||||
|
for x, _ in enumerate(tensor_info):
|
||||||
|
f_write.write(
|
||||||
|
"-----------------------------------------------------------\n")
|
||||||
|
f_write.write("tensor_info_" + str(x+1) + " attributes:\n")
|
||||||
|
f_write.write("node name = " + tensor_info[x].node_name + "\n")
|
||||||
|
f_write.write("slot = " + str(tensor_info[x].slot) + "\n")
|
||||||
|
f_write.write("iteration = " + str(tensor_info[x].iteration) + "\n")
|
||||||
|
f_write.write("rank_id = " + str(tensor_info[x].rank_id) + "\n")
|
||||||
|
f_write.write("root_graph_id = " +
|
||||||
|
str(tensor_info[x].root_graph_id) + "\n")
|
||||||
|
f_write.write("is_output = " +
|
||||||
|
str(tensor_info[x].is_output) + "\n")
|
||||||
|
f_write.write("\n")
|
||||||
|
f_write.write("tensor_base_info:\n")
|
||||||
|
f_write.write("size in bytes = " +
|
||||||
|
str(tensor_base_data_list[x].data_size) + "\n")
|
||||||
|
f_write.write("debugger dtype = " + str(tensor_base_data_list[x].dtype) + "\n")
|
||||||
|
f_write.write("shape = " + str(tensor_base_data_list[x].shape) + "\n")
|
||||||
|
|
||||||
|
f_write.write("\n")
|
||||||
|
f_write.write("tensor_stat_info:\n")
|
||||||
|
|
||||||
|
f_write.write("size in bytes = " +
|
||||||
|
str(tensor_stat_data_list[x].data_size) + "\n")
|
||||||
|
f_write.write("debugger dtype = " + str(tensor_stat_data_list[x].dtype) + "\n")
|
||||||
|
f_write.write("shape = " + str(tensor_stat_data_list[x].shape) + "\n")
|
||||||
|
f_write.write("is_bool = " + str(tensor_stat_data_list[x].is_bool) + "\n")
|
||||||
|
f_write.write("max_value = " + str(tensor_stat_data_list[x].max_value) + "\n")
|
||||||
|
f_write.write("min_value = " + str(tensor_stat_data_list[x].min_value) + "\n")
|
||||||
|
f_write.write("avg_value = " + str(tensor_stat_data_list[x].avg_value) + "\n")
|
||||||
|
f_write.write("count = " + str(tensor_stat_data_list[x].count) + "\n")
|
||||||
|
f_write.write("neg_zero_count = " + str(tensor_stat_data_list[x].neg_zero_count) + "\n")
|
||||||
|
f_write.write("pos_zero_count = " + str(tensor_stat_data_list[x].pos_zero_count) + "\n")
|
||||||
|
f_write.write("nan_count = " + str(tensor_stat_data_list[x].nan_count) + "\n")
|
||||||
|
f_write.write("neg_inf_count = " + str(tensor_stat_data_list[x].neg_inf_count) + "\n")
|
||||||
|
f_write.write("pos_inf_count = " + str(tensor_stat_data_list[x].pos_inf_count) + "\n")
|
||||||
|
f_write.write("zero_count = " + str(tensor_stat_data_list[x].zero_count) + "\n")
|
||||||
|
f_write.close()
|
Loading…
Reference in New Issue