forked from mindspore-Ecosystem/mindspore
!23823 Clean pclint, codex warning
Merge pull request !23823 from sabrinasun_59ee/warning
This commit is contained in:
commit
e5bdb5dd6d
|
@ -81,7 +81,7 @@ void DebugServices::AddWatchpoint(
|
|||
|
||||
void DebugServices::RemoveWatchpoint(unsigned int id) {
|
||||
std::lock_guard<std::mutex> lg(lock_);
|
||||
watchpoint_table_.erase(id);
|
||||
(void)watchpoint_table_.erase(id);
|
||||
}
|
||||
|
||||
std::unique_ptr<ITensorSummary> GetSummaryPtr(const std::shared_ptr<TensorData> &tensor,
|
||||
|
@ -262,11 +262,12 @@ void DebugServices::AddAnalyzedTensorToCache(const bool recheck, const unsigned
|
|||
}
|
||||
|
||||
void DebugServices::SetCheckWatchpointsResult(
|
||||
const int chunk_id, partitioned_names *chunk_names, partitioned_names *chunk_slots,
|
||||
partitioned_numbers *chunk_conditions, partitioned_id *chunk_watchpoint_id, partitioned_parameters *chunk_parameters,
|
||||
partitioned_error_code *chunk_error_codes, partitioned_numbers *chunk_exec_orders,
|
||||
partitioned_names *chunk_time_stamp, partitioned_id *chunk_device_id, partitioned_id *chunk_root_graph_id,
|
||||
std::vector<unsigned int> *device_id, std::vector<unsigned int> *root_graph_id, const int exec_order,
|
||||
const int chunk_id, partitioned_names *const chunk_names, partitioned_names *const chunk_slots,
|
||||
partitioned_numbers *const chunk_conditions, partitioned_id *const chunk_watchpoint_id,
|
||||
partitioned_parameters *const chunk_parameters, partitioned_error_code *const chunk_error_codes,
|
||||
partitioned_numbers *const chunk_exec_orders, partitioned_names *const chunk_time_stamp,
|
||||
partitioned_id *const chunk_device_id, partitioned_id *const chunk_root_graph_id,
|
||||
std::vector<unsigned int> *const device_id, std::vector<unsigned int> *const root_graph_id, const int exec_order,
|
||||
const std::string time_stamp, const std::string &qualified_tensor_name, const std::string &tensor_slot,
|
||||
const watchpoint_t &wp, const unsigned int device_id_val, const unsigned int root_graph_id_val,
|
||||
const std::vector<parameter_t> ¶meter_list, const int32_t error_code) {
|
||||
|
@ -287,14 +288,15 @@ void DebugServices::SetCheckWatchpointsResult(
|
|||
}
|
||||
|
||||
void DebugServices::CheckWatchpointsForTensor(
|
||||
partitioned_names *chunk_names, partitioned_names *chunk_slots, partitioned_numbers *chunk_conditions,
|
||||
partitioned_id *const chunk_watchpoint_id, partitioned_parameters *chunk_parameters,
|
||||
partitioned_error_code *chunk_error_codes, const std::vector<std::string> &op_overflows,
|
||||
const std::vector<std::string> &async_file_pool, partitioned_numbers *chunk_exec_orders,
|
||||
std::vector<std::shared_ptr<TensorData>> *tensor_list, int begin, int end, int chunk_id, const bool init_dbg_suspend,
|
||||
const bool step_end, const bool recheck, partitioned_id *chunk_device_id, partitioned_id *chunk_root_graph_id,
|
||||
std::vector<uint64_t> *chunk_tensor_byte_size, partitioned_names *chunk_time_stamp,
|
||||
std::vector<unsigned int> *device_id, std::vector<unsigned int> *root_graph_id) {
|
||||
partitioned_names *const chunk_names, partitioned_names *const chunk_slots,
|
||||
partitioned_numbers *const chunk_conditions, partitioned_id *const chunk_watchpoint_id,
|
||||
partitioned_parameters *const chunk_parameters, partitioned_error_code *const chunk_error_codes,
|
||||
const std::vector<std::string> &op_overflows, const std::vector<std::string> &async_file_pool,
|
||||
partitioned_numbers *const chunk_exec_orders, std::vector<std::shared_ptr<TensorData>> *const tensor_list, int begin,
|
||||
int end, int chunk_id, const bool init_dbg_suspend, const bool step_end, const bool recheck,
|
||||
partitioned_id *const chunk_device_id, partitioned_id *const chunk_root_graph_id,
|
||||
std::vector<uint64_t> *const chunk_tensor_byte_size, partitioned_names *const chunk_time_stamp,
|
||||
std::vector<unsigned int> *const device_id, std::vector<unsigned int> *const root_graph_id) {
|
||||
for (int i = begin; i < end; i++) {
|
||||
auto &tensor = (*tensor_list)[i];
|
||||
const auto tensor_name = tensor->GetName();
|
||||
|
@ -400,15 +402,13 @@ void DebugServices::CheckWatchpointsForTensor(
|
|||
#endif
|
||||
}
|
||||
}
|
||||
void DebugServices::CheckWatchpoints(std::vector<std::string> *const name, std::vector<std::string> *const slot,
|
||||
std::vector<int> *const condition, std::vector<unsigned int> *const watchpoint_id,
|
||||
std::vector<std::vector<parameter_t>> *const parameters,
|
||||
std::vector<int32_t> *const error_codes,
|
||||
const std::vector<std::string> &op_overflows,
|
||||
const std::vector<std::string> &async_file_pool,
|
||||
std::vector<std::shared_ptr<TensorData>> *tensor_list, const bool init_dbg_suspend,
|
||||
const bool step_end, const bool recheck, std::vector<unsigned int> *device_id,
|
||||
std::vector<unsigned int> *root_graph_id) {
|
||||
void DebugServices::CheckWatchpoints(
|
||||
std::vector<std::string> *const name, std::vector<std::string> *const slot, std::vector<int> *const condition,
|
||||
std::vector<unsigned int> *const watchpoint_id, std::vector<std::vector<parameter_t>> *const parameters,
|
||||
std::vector<int32_t> *const error_codes, const std::vector<std::string> &op_overflows,
|
||||
const std::vector<std::string> &async_file_pool, std::vector<std::shared_ptr<TensorData>> *const tensor_list,
|
||||
const bool init_dbg_suspend, const bool step_end, const bool recheck, std::vector<unsigned int> *const device_id,
|
||||
std::vector<unsigned int> *const root_graph_id) {
|
||||
std::lock_guard<std::mutex> lg(lock_);
|
||||
auto t1 = std::chrono::high_resolution_clock::now();
|
||||
if (watchpoint_table_.empty()) {
|
||||
|
@ -474,15 +474,17 @@ void DebugServices::CheckWatchpoints(std::vector<std::string> *const name, std::
|
|||
}
|
||||
|
||||
void DebugServices::SortWatchpointsInfo(
|
||||
std::vector<std::future<void>> *tensor_future_vec, std::vector<int> *exec_order,
|
||||
std::vector<std::string> *time_stamps, uint64_t *tensor_list_byte_size, std::vector<std::string> *name,
|
||||
std::vector<std::string> *slot, std::vector<int> *condition, std::vector<unsigned int> *watchpoint_id,
|
||||
std::vector<std::vector<parameter_t>> *parameters, std::vector<int32_t> *error_codes, partitioned_names *chunk_names,
|
||||
partitioned_names *chunk_slots, partitioned_numbers *chunk_conditions, partitioned_id *chunk_watchpoint_id,
|
||||
partitioned_parameters *chunk_parameters, partitioned_error_code *chunk_error_codes,
|
||||
partitioned_numbers *chunk_exec_orders, partitioned_names *chunk_time_stamp,
|
||||
std::vector<uint64_t> *chunk_tensor_byte_size, partitioned_id *chunk_device_id, partitioned_id *chunk_root_graph_id,
|
||||
std::vector<unsigned int> *device_id, std::vector<unsigned int> *root_graph_id) {
|
||||
std::vector<std::future<void>> *const tensor_future_vec, std::vector<int> *const exec_order,
|
||||
std::vector<std::string> *const time_stamps, uint64_t *const tensor_list_byte_size,
|
||||
std::vector<std::string> *const name, std::vector<std::string> *const slot, std::vector<int> *const condition,
|
||||
std::vector<unsigned int> *const watchpoint_id, std::vector<std::vector<parameter_t>> *const parameters,
|
||||
std::vector<int32_t> *const error_codes, partitioned_names *const chunk_names, partitioned_names *const chunk_slots,
|
||||
partitioned_numbers *const chunk_conditions, partitioned_id *const chunk_watchpoint_id,
|
||||
partitioned_parameters *const chunk_parameters, partitioned_error_code *const chunk_error_codes,
|
||||
partitioned_numbers *const chunk_exec_orders, partitioned_names *const chunk_time_stamp,
|
||||
std::vector<uint64_t> *const chunk_tensor_byte_size, partitioned_id *const chunk_device_id,
|
||||
partitioned_id *const chunk_root_graph_id, std::vector<unsigned int> *const device_id,
|
||||
std::vector<unsigned int> *const root_graph_id) {
|
||||
for (unsigned int i = 0; i < (*tensor_future_vec).size(); i++) {
|
||||
(*tensor_future_vec)[i].wait();
|
||||
(*tensor_future_vec)[i].get();
|
||||
|
@ -492,26 +494,26 @@ void DebugServices::SortWatchpointsInfo(
|
|||
std::vector<int>::iterator iter =
|
||||
std::lower_bound(exec_order->begin(), exec_order->end(), (*chunk_exec_orders)[i][j]);
|
||||
int position = iter - exec_order->begin();
|
||||
exec_order->insert(iter, (*chunk_exec_orders)[i][j]);
|
||||
(void)exec_order->insert(iter, (*chunk_exec_orders)[i][j]);
|
||||
#endif
|
||||
#ifdef OFFLINE_DBG_MODE
|
||||
std::vector<std::string>::iterator iter =
|
||||
std::lower_bound(time_stamps->begin(), time_stamps->end(), (*chunk_time_stamp)[i][j]);
|
||||
int position = iter - time_stamps->begin();
|
||||
time_stamps->insert(iter, (*chunk_time_stamp)[i][j]);
|
||||
(void)time_stamps->insert(iter, (*chunk_time_stamp)[i][j]);
|
||||
#endif
|
||||
name->insert(name->begin() + position, (*chunk_names)[i][j]);
|
||||
slot->insert(slot->begin() + position, (*chunk_slots)[i][j]);
|
||||
condition->insert(condition->begin() + position, (*chunk_conditions)[i][j]);
|
||||
watchpoint_id->insert(watchpoint_id->begin() + position, (*chunk_watchpoint_id)[i][j]);
|
||||
(void)name->insert(name->begin() + position, (*chunk_names)[i][j]);
|
||||
(void)slot->insert(slot->begin() + position, (*chunk_slots)[i][j]);
|
||||
(void)condition->insert(condition->begin() + position, (*chunk_conditions)[i][j]);
|
||||
(void)watchpoint_id->insert(watchpoint_id->begin() + position, (*chunk_watchpoint_id)[i][j]);
|
||||
if (device_id != nullptr) {
|
||||
device_id->insert(device_id->begin() + position, (*chunk_device_id)[i][j]);
|
||||
(void)device_id->insert(device_id->begin() + position, (*chunk_device_id)[i][j]);
|
||||
}
|
||||
if (root_graph_id != nullptr) {
|
||||
root_graph_id->insert(root_graph_id->begin() + position, (*chunk_root_graph_id)[i][j]);
|
||||
(void)root_graph_id->insert(root_graph_id->begin() + position, (*chunk_root_graph_id)[i][j]);
|
||||
}
|
||||
parameters->insert(parameters->begin() + position, (*chunk_parameters)[i][j]);
|
||||
error_codes->insert(error_codes->begin() + position, (*chunk_error_codes)[i][j]);
|
||||
(void)parameters->insert(parameters->begin() + position, (*chunk_parameters)[i][j]);
|
||||
(void)error_codes->insert(error_codes->begin() + position, (*chunk_error_codes)[i][j]);
|
||||
}
|
||||
// free the memory for used vectors
|
||||
std::vector<int>().swap((*chunk_exec_orders)[i]);
|
||||
|
@ -537,8 +539,10 @@ void DebugServices::ReadTensorFromNpy(const std::string &tensor_name, const std:
|
|||
MS_LOG(INFO) << "Reading in file: " << file_path;
|
||||
infile.open(file_path.c_str(), std::ios::ate | std::ios::binary | std::ios::in);
|
||||
if (!infile.is_open()) {
|
||||
MS_LOG(ERROR) << "Failed to open file (In ReadTensorFromNpy) " << file_path << " Errno:" << errno
|
||||
<< " ErrInfo:" << strerror(errno);
|
||||
MS_LOG(ERROR) << "Failed to open file (In ReadTensorFromNpy) " << file_path << " Errno:" << errno;
|
||||
const int kMaxFilenameLength = 128;
|
||||
char err_info[kMaxFilenameLength];
|
||||
MS_LOG(ERROR) << " ErrInfo:" << strerror_r(errno, err_info, sizeof(err_info));
|
||||
return;
|
||||
}
|
||||
const int substr_len = 2;
|
||||
|
@ -547,7 +551,7 @@ void DebugServices::ReadTensorFromNpy(const std::string &tensor_name, const std:
|
|||
const int header_len_buffer_size = 2;
|
||||
const int type_offset = 10;
|
||||
// get header length
|
||||
infile.seekg(0, std::ios::beg);
|
||||
(void)infile.seekg(0, std::ios::beg);
|
||||
auto header_len_buffer = std::make_unique<std::vector<char>>(header_len_offset + header_len_buffer_size);
|
||||
if (!infile.read(header_len_buffer->data(), header_len_offset + header_len_buffer_size)) {
|
||||
MS_LOG(ERROR) << "Failed to parse header length from " << file_path;
|
||||
|
@ -556,7 +560,7 @@ void DebugServices::ReadTensorFromNpy(const std::string &tensor_name, const std:
|
|||
uint16_t header_len = *reinterpret_cast<uint16_t *>(header_len_buffer->data() + header_len_offset);
|
||||
header_len_buffer.reset();
|
||||
// read in header
|
||||
infile.seekg(0, std::ios::beg);
|
||||
(void)infile.seekg(0, std::ios::beg);
|
||||
auto header_buffer = std::make_unique<std::vector<char>>(header_len_offset + header_len);
|
||||
if (!infile.read(header_buffer->data(), header_len_offset + header_len)) {
|
||||
MS_LOG(ERROR) << "Failed to read header from " << file_path;
|
||||
|
@ -591,7 +595,7 @@ void DebugServices::ReadTensorFromNpy(const std::string &tensor_name, const std:
|
|||
MS_LOG(ERROR) << "No enough memory available for loading " << tensor_name << " into host memory.";
|
||||
*no_mem_to_read = true;
|
||||
} else {
|
||||
infile.seekg(header_len + type_offset);
|
||||
(void)infile.seekg(header_len + type_offset);
|
||||
*data_buffer = new std::vector<char>(data_size);
|
||||
if (data_buffer == nullptr || !infile.read((*data_buffer)->data(), data_size)) {
|
||||
MS_LOG(ERROR) << "Unable to get tensor data from npy";
|
||||
|
@ -691,7 +695,7 @@ void ReplaceSrcFileName(std::string *dump_style_name) {
|
|||
std::string::size_type dstlen = strdst.size();
|
||||
|
||||
while ((pos = dump_style_name->find(strsrc, pos)) != std::string::npos) {
|
||||
dump_style_name->replace(pos, srclen, strdst);
|
||||
(void)dump_style_name->replace(pos, srclen, strdst);
|
||||
pos += dstlen;
|
||||
}
|
||||
}
|
||||
|
@ -857,7 +861,7 @@ void DebugServices::AddToTensorData(const std::string &backend_name, const std::
|
|||
tensor_data->SetShape(shape);
|
||||
tensor_data->SetTimeStamp(time_stamp);
|
||||
if (data_size) {
|
||||
tensor_loader_->LoadNewTensor(tensor_data, false);
|
||||
(void)tensor_loader_->LoadNewTensor(tensor_data, false);
|
||||
}
|
||||
|
||||
// add to result_list
|
||||
|
@ -969,6 +973,7 @@ void DebugServices::ReadDumpedTensorSync(const std::string &prefix_dump_file_nam
|
|||
found_file = true;
|
||||
}
|
||||
}
|
||||
(void)closedir(d);
|
||||
}
|
||||
|
||||
if (found_file) {
|
||||
|
@ -986,7 +991,6 @@ void DebugServices::ReadDumpedTensorSync(const std::string &prefix_dump_file_nam
|
|||
buffer, result_list);
|
||||
MS_LOG(INFO) << "Target tensor has not been found.";
|
||||
}
|
||||
(void)closedir(d);
|
||||
}
|
||||
|
||||
void DebugServices::ReadDumpedTensorAsync(const std::string &specific_dump_dir, const std::string &prefix_dump_to_check,
|
||||
|
@ -1327,8 +1331,10 @@ bool DebugServices::CheckOpOverflow(std::string node_name_to_find, unsigned int
|
|||
std::ifstream infile;
|
||||
infile.open(file_path.c_str(), std::ios::ate | std::ios::binary | std::ios::in);
|
||||
if (!infile.is_open()) {
|
||||
MS_LOG(ERROR) << "Failed to open overflow bin file " << file_name << " Errno:" << errno
|
||||
<< " ErrInfo:" << strerror(errno);
|
||||
MS_LOG(ERROR) << "Failed to open overflow bin file " << file_name << " Errno:" << errno;
|
||||
const int kMaxFilenameLength = 128;
|
||||
char err_info[kMaxFilenameLength];
|
||||
MS_LOG(ERROR) << " ErrInfo:" << strerror_r(errno, err_info, sizeof(err_info));
|
||||
continue;
|
||||
}
|
||||
|
||||
|
|
|
@ -191,7 +191,7 @@ void Debugger::EnableDebugger() {
|
|||
// initialize grpc client
|
||||
grpc_client_ = std::make_unique<GrpcClient>(host, port);
|
||||
// initialize sending heartbeat
|
||||
heartbeat_thread_ = std::make_unique<std::thread>([=]() { SendHeartbeat(heartbeat_period_second); });
|
||||
heartbeat_thread_ = std::make_unique<std::thread>([this]() { SendHeartbeat(heartbeat_period_second); });
|
||||
}
|
||||
debug_services_ = std::make_unique<DebugServices>();
|
||||
}
|
||||
|
@ -920,7 +920,8 @@ void AddTensorProtoInfo(TensorProto *tensor_item, const TensorProto &tensor) {
|
|||
tensor_item->clear_dims();
|
||||
}
|
||||
|
||||
void AddTensorStatInfo(const DebugServices::TensorStat &tensor_stat, std::list<TensorSummary> *tensor_summary_list) {
|
||||
void AddTensorStatInfo(const DebugServices::TensorStat &tensor_stat,
|
||||
std::list<TensorSummary> *const tensor_summary_list) {
|
||||
if (tensor_summary_list == nullptr) {
|
||||
MS_LOG(DEBUG) << "tensor_summary_list is nullptr.";
|
||||
return;
|
||||
|
@ -928,7 +929,7 @@ void AddTensorStatInfo(const DebugServices::TensorStat &tensor_stat, std::list<T
|
|||
TensorSummary tensor_summary_item;
|
||||
TensorBase *tensor_base = tensor_summary_item.mutable_tensor_base();
|
||||
tensor_base->set_data_type(tensor_stat.dtype);
|
||||
tensor_base->set_data_size(tensor_stat.data_size);
|
||||
tensor_base->set_data_size((int64_t)tensor_stat.data_size);
|
||||
for (auto elem : tensor_stat.shape) {
|
||||
tensor_base->add_shape(elem);
|
||||
}
|
||||
|
@ -1043,8 +1044,8 @@ std::list<TensorBase> Debugger::LoadTensorsBase(const ProtoVector<TensorProto> &
|
|||
}
|
||||
// tensor was found creating tensor base object.
|
||||
TensorBase tensor_base_item;
|
||||
tensor_base_item.set_data_size(tensor->GetByteSize());
|
||||
tensor_base_item.set_data_type(tensor->GetType());
|
||||
tensor_base_item.set_data_size((int64_t)tensor->GetByteSize());
|
||||
tensor_base_item.set_data_type((int64_t)tensor->GetType());
|
||||
for (auto elem : tensor->GetShape()) {
|
||||
tensor_base_item.add_shape(elem);
|
||||
}
|
||||
|
|
|
@ -42,11 +42,12 @@ std::vector<int> CheckRealOutput(const std::string &node_name, const size_t &out
|
|||
// can add the filter list for more operators here....
|
||||
if (node_name == "BatchNorm") {
|
||||
MS_LOG(INFO) << "loading node named " << node_name;
|
||||
real_outputs.insert(real_outputs.end(), {0, 3, 4});
|
||||
(void)real_outputs.insert(real_outputs.end(), {0, 3, 4});
|
||||
} else {
|
||||
// by default, TensorLoader will load all outputs
|
||||
for (size_t j = 0; j < output_size; ++j) {
|
||||
real_outputs.push_back(j);
|
||||
size_t index = j;
|
||||
real_outputs.push_back(index);
|
||||
}
|
||||
}
|
||||
return real_outputs;
|
||||
|
@ -89,7 +90,7 @@ void LoadOutputs(const CNodePtr &cnode, const KernelLaunchInfo *launch_info_, ui
|
|||
|
||||
for (int j : real_outputs) {
|
||||
auto addr = kernel_outputs[j];
|
||||
auto type = AnfAlgo::GetOutputInferDataType(cnode, j);
|
||||
auto type = AnfAlgo::GetOutputInferDataType(cnode, (size_t)j);
|
||||
// For example, this happens with the Depend op
|
||||
if (type == kMetaTypeNone) {
|
||||
continue;
|
||||
|
|
|
@ -46,12 +46,12 @@ DbgServices::~DbgServices() {
|
|||
delete debug_services_;
|
||||
}
|
||||
|
||||
std::string DbgServices::GetVersion() {
|
||||
std::string DbgServices::GetVersion() const {
|
||||
MS_LOG(INFO) << "get version is called";
|
||||
return MSVERSION;
|
||||
}
|
||||
|
||||
int32_t DbgServices::Initialize(std::string net_name, std::string dump_folder_path, bool is_sync_mode,
|
||||
int32_t DbgServices::Initialize(const std::string net_name, const std::string dump_folder_path, bool is_sync_mode,
|
||||
uint64_t max_mem_usage) {
|
||||
MS_LOG(INFO) << "cpp DbgServices initialize network name " << net_name;
|
||||
MS_LOG(INFO) << "cpp DbgServices initialize dump folder path " << dump_folder_path;
|
||||
|
@ -66,9 +66,10 @@ int32_t DbgServices::Initialize(std::string net_name, std::string dump_folder_pa
|
|||
debug_services_->SetDumpDir(dump_folder_path);
|
||||
debug_services_->SetSyncMode(is_sync_mode);
|
||||
// Set the memory ratio used by tensor cache. Leave 50% for other debugger backend usage.
|
||||
const uint64_t kMegabytesToBytes = pow(2, 20); // max_mem_usage will be bytes in unit in debugger backend.
|
||||
const uint64_t kMegabytesToBytes = 1048576; // max_mem_usage will be bytes in unit in debugger backend.
|
||||
auto cache_mem_ratio = 0.5;
|
||||
debug_services_->SetMemLimit(max_mem_usage * kMegabytesToBytes * cache_mem_ratio);
|
||||
const uint64_t memlimit = max_mem_usage * kMegabytesToBytes * cache_mem_ratio;
|
||||
debug_services_->SetMemLimit(memlimit);
|
||||
return 0;
|
||||
}
|
||||
|
||||
|
@ -89,8 +90,9 @@ int32_t DbgServices::AddWatchpoint(
|
|||
|
||||
std::vector<std::string> rank_id_str = std::get<std::vector<std::string>>(attr_map["rank_id"]);
|
||||
std::vector<std::uint32_t> rank_id;
|
||||
std::transform(rank_id_str.begin(), rank_id_str.end(), std::back_inserter(rank_id),
|
||||
[](std::string &id_str) -> std::uint32_t { return static_cast<uint32_t>(std::stoul(id_str)); });
|
||||
(void)std::transform(
|
||||
rank_id_str.begin(), rank_id_str.end(), std::back_inserter(rank_id),
|
||||
[](std::string &id_str) -> std::uint32_t { return static_cast<uint32_t>(std::stoul(id_str)); });
|
||||
MS_LOG(INFO) << "cpp DbgServices AddWatchpoint rank_id ";
|
||||
for (auto const &i : rank_id) {
|
||||
MS_LOG(INFO) << i << " ";
|
||||
|
@ -99,7 +101,7 @@ int32_t DbgServices::AddWatchpoint(
|
|||
// std::vector<uint32_t> root_graph_id = std::get<std::vector<uint32_t>>(attr_map["root_graph_id"]);
|
||||
std::vector<std::string> root_graph_id_str = std::get<std::vector<std::string>>(attr_map["root_graph_id"]);
|
||||
std::vector<std::uint32_t> root_graph_id;
|
||||
std::transform(
|
||||
(void)std::transform(
|
||||
root_graph_id_str.begin(), root_graph_id_str.end(), std::back_inserter(root_graph_id),
|
||||
[](std::string &graph_str) -> std::uint32_t { return static_cast<uint32_t>(std::stoul(graph_str)); });
|
||||
MS_LOG(INFO) << "cpp DbgServices AddWatchpoint root_graph_id";
|
||||
|
@ -121,36 +123,37 @@ int32_t DbgServices::AddWatchpoint(
|
|||
std::vector<std::tuple<std::string, std::vector<uint32_t>>> check_node_graph_list;
|
||||
std::vector<DebugServices::parameter_t> parameter_list_backend;
|
||||
|
||||
std::transform(check_nodes.begin(), check_nodes.end(), std::back_inserter(check_node_list),
|
||||
[](auto &node) -> std::tuple<std::string, bool> {
|
||||
auto attr_map = node.second;
|
||||
return std::make_tuple(node.first, std::get<bool>(attr_map["is_output"]));
|
||||
});
|
||||
(void)std::transform(check_nodes.begin(), check_nodes.end(), std::back_inserter(check_node_list),
|
||||
[](auto &node) -> std::tuple<std::string, bool> {
|
||||
auto attr_map = node.second;
|
||||
return std::make_tuple(node.first, std::get<bool>(attr_map["is_output"]));
|
||||
});
|
||||
|
||||
std::transform(check_nodes.begin(), check_nodes.end(), std::back_inserter(check_node_device_list),
|
||||
[](auto &node) -> std::tuple<std::string, std::vector<uint32_t>> {
|
||||
auto attr_map = node.second;
|
||||
std::vector<std::string> rank_id_str = std::get<std::vector<std::string>>(attr_map["rank_id"]);
|
||||
std::vector<std::uint32_t> rank_id;
|
||||
std::transform(
|
||||
rank_id_str.begin(), rank_id_str.end(), std::back_inserter(rank_id),
|
||||
[](std::string &id_str) -> std::uint32_t { return static_cast<uint32_t>(std::stoul(id_str)); });
|
||||
return std::make_tuple(node.first, rank_id);
|
||||
});
|
||||
(void)std::transform(check_nodes.begin(), check_nodes.end(), std::back_inserter(check_node_device_list),
|
||||
[](auto &node) -> std::tuple<std::string, std::vector<uint32_t>> {
|
||||
auto attr_map = node.second;
|
||||
std::vector<std::string> rank_id_str = std::get<std::vector<std::string>>(attr_map["rank_id"]);
|
||||
std::vector<std::uint32_t> rank_id;
|
||||
(void)std::transform(rank_id_str.begin(), rank_id_str.end(), std::back_inserter(rank_id),
|
||||
[](std::string &id_str) -> std::uint32_t {
|
||||
return static_cast<uint32_t>(std::stoul(id_str));
|
||||
});
|
||||
return std::make_tuple(node.first, rank_id);
|
||||
});
|
||||
|
||||
std::transform(
|
||||
(void)std::transform(
|
||||
check_nodes.begin(), check_nodes.end(), std::back_inserter(check_node_graph_list),
|
||||
[](auto &node) -> std::tuple<std::string, std::vector<uint32_t>> {
|
||||
auto attr_map = node.second;
|
||||
std::vector<std::string> root_graph_id_str = std::get<std::vector<std::string>>(attr_map["root_graph_id"]);
|
||||
std::vector<std::uint32_t> root_graph_id;
|
||||
std::transform(
|
||||
(void)std::transform(
|
||||
root_graph_id_str.begin(), root_graph_id_str.end(), std::back_inserter(root_graph_id),
|
||||
[](std::string &graph_str) -> std::uint32_t { return static_cast<uint32_t>(std::stoul(graph_str)); });
|
||||
return std::make_tuple(node.first, root_graph_id);
|
||||
});
|
||||
|
||||
std::transform(
|
||||
(void)std::transform(
|
||||
parameter_list.begin(), parameter_list.end(), std::back_inserter(parameter_list_backend),
|
||||
[](const parameter_t ¶meter) -> DebugServices::parameter_t {
|
||||
return DebugServices::parameter_t{parameter.name, parameter.disabled, parameter.value, parameter.hit};
|
||||
|
@ -221,17 +224,17 @@ std::vector<watchpoint_hit_t> DbgServices::CheckWatchpoints(unsigned int iterati
|
|||
return hits;
|
||||
}
|
||||
|
||||
std::string GetTensorFullName(tensor_info_t info) { return info.node_name + ":" + std::to_string(info.slot); }
|
||||
std::string GetTensorFullName(const tensor_info_t info) { return info.node_name + ":" + std::to_string(info.slot); }
|
||||
|
||||
unsigned int GetTensorRankId(tensor_info_t info) { return info.rank_id; }
|
||||
unsigned int GetTensorRankId(const tensor_info_t info) { return info.rank_id; }
|
||||
|
||||
unsigned int GetTensorRootGraphId(tensor_info_t info) { return info.root_graph_id; }
|
||||
unsigned int GetTensorRootGraphId(const tensor_info_t info) { return info.root_graph_id; }
|
||||
|
||||
unsigned int GetTensorIteration(tensor_info_t info) { return info.iteration; }
|
||||
unsigned int GetTensorIteration(const tensor_info_t info) { return info.iteration; }
|
||||
|
||||
unsigned int GetTensorSlot(tensor_info_t info) { return info.slot; }
|
||||
unsigned int GetTensorSlot(const tensor_info_t info) { return info.slot; }
|
||||
|
||||
bool GetTensorIsOutput(tensor_info_t info) { return info.is_output; }
|
||||
bool GetTensorIsOutput(const tensor_info_t info) { return info.is_output; }
|
||||
|
||||
std::vector<std::shared_ptr<TensorData>> DbgServices::ReadTensorsUtil(std::vector<tensor_info_t> info) {
|
||||
for (auto i : info) {
|
||||
|
@ -247,12 +250,12 @@ std::vector<std::shared_ptr<TensorData>> DbgServices::ReadTensorsUtil(std::vecto
|
|||
std::vector<std::shared_ptr<TensorData>> result_list;
|
||||
std::vector<bool> is_output;
|
||||
|
||||
std::transform(info.begin(), info.end(), std::back_inserter(backend_name), GetTensorFullName);
|
||||
std::transform(info.begin(), info.end(), std::back_inserter(slot), GetTensorSlot);
|
||||
std::transform(info.begin(), info.end(), std::back_inserter(rank_id), GetTensorRankId);
|
||||
std::transform(info.begin(), info.end(), std::back_inserter(root_graph_id), GetTensorRootGraphId);
|
||||
std::transform(info.begin(), info.end(), std::back_inserter(iteration), GetTensorIteration);
|
||||
std::transform(info.begin(), info.end(), std::back_inserter(is_output), GetTensorIsOutput);
|
||||
(void)std::transform(info.begin(), info.end(), std::back_inserter(backend_name), GetTensorFullName);
|
||||
(void)std::transform(info.begin(), info.end(), std::back_inserter(slot), GetTensorSlot);
|
||||
(void)std::transform(info.begin(), info.end(), std::back_inserter(rank_id), GetTensorRankId);
|
||||
(void)std::transform(info.begin(), info.end(), std::back_inserter(root_graph_id), GetTensorRootGraphId);
|
||||
(void)std::transform(info.begin(), info.end(), std::back_inserter(iteration), GetTensorIteration);
|
||||
(void)std::transform(info.begin(), info.end(), std::back_inserter(is_output), GetTensorIsOutput);
|
||||
|
||||
MS_LOG(INFO) << "cpp before";
|
||||
std::vector<std::string> file_paths;
|
||||
|
@ -264,9 +267,13 @@ std::vector<std::shared_ptr<TensorData>> DbgServices::ReadTensorsUtil(std::vecto
|
|||
debug_services_->ReadDumpedTensor(backend_name, slot, rank_id, iteration, root_graph_id, is_output, file_paths,
|
||||
&result_list);
|
||||
for (auto result : result_list) {
|
||||
std::string output = "0";
|
||||
if (result->GetIsOutput()) {
|
||||
output = "1";
|
||||
}
|
||||
std::string key_name_in_cache = result->GetName() + ":" + std::to_string(result->GetDeviceId()) + ":" +
|
||||
std::to_string(result->GetRootGraphId()) + ":" +
|
||||
std::to_string(result->GetIsOutput()) + ":" + std::to_string(result->GetSlot());
|
||||
std::to_string(result->GetRootGraphId()) + ":" + output + ":" +
|
||||
std::to_string(result->GetSlot());
|
||||
debug_services_->AppendToCacheEvictQueue(key_name_in_cache);
|
||||
}
|
||||
auto t2 = std::chrono::high_resolution_clock::now();
|
||||
|
@ -279,7 +286,7 @@ std::vector<std::shared_ptr<TensorData>> DbgServices::ReadTensorsUtil(std::vecto
|
|||
return result_list;
|
||||
}
|
||||
|
||||
std::vector<tensor_data_t> DbgServices::ReadTensors(std::vector<tensor_info_t> info) {
|
||||
std::vector<tensor_data_t> DbgServices::ReadTensors(const std::vector<tensor_info_t> info) {
|
||||
std::vector<tensor_data_t> tensors_read;
|
||||
std::vector<std::shared_ptr<TensorData>> result_list;
|
||||
result_list = ReadTensorsUtil(info);
|
||||
|
@ -290,7 +297,7 @@ std::vector<tensor_data_t> DbgServices::ReadTensors(std::vector<tensor_info_t> i
|
|||
return tensors_read;
|
||||
}
|
||||
|
||||
std::vector<TensorBaseData> DbgServices::ReadTensorsBase(std::vector<tensor_info_t> info) {
|
||||
std::vector<TensorBaseData> DbgServices::ReadTensorsBase(const std::vector<tensor_info_t> info) {
|
||||
std::vector<TensorBaseData> tensors_read_base;
|
||||
std::vector<std::shared_ptr<TensorData>> result_list;
|
||||
result_list = ReadTensorsUtil(info);
|
||||
|
@ -308,7 +315,7 @@ std::vector<TensorBaseData> DbgServices::ReadTensorsBase(std::vector<tensor_info
|
|||
}
|
||||
|
||||
void AddTensorStatInfo(const DebugServices::TensorStat &tensor_statistics,
|
||||
std::vector<TensorStatData> *tensors_read_stat) {
|
||||
std::vector<TensorStatData> *const tensors_read_stat) {
|
||||
if (tensors_read_stat == nullptr) {
|
||||
MS_LOG(DEBUG) << "tensors_read_stat is nullptr.";
|
||||
return;
|
||||
|
@ -321,7 +328,7 @@ void AddTensorStatInfo(const DebugServices::TensorStat &tensor_statistics,
|
|||
tensors_read_stat->push_back(tensor_data_item);
|
||||
}
|
||||
|
||||
std::vector<TensorStatData> DbgServices::ReadTensorsStat(std::vector<tensor_info_t> info) {
|
||||
std::vector<TensorStatData> DbgServices::ReadTensorsStat(const std::vector<tensor_info_t> info) {
|
||||
std::vector<TensorStatData> tensors_read_stat;
|
||||
std::vector<std::shared_ptr<TensorData>> result_list;
|
||||
result_list = ReadTensorsUtil(info);
|
||||
|
|
|
@ -194,7 +194,8 @@ class DbgServices {
|
|||
|
||||
~DbgServices();
|
||||
|
||||
int32_t Initialize(std::string net_name, std::string dump_folder_path, bool is_sync_mode, uint64_t max_mem_usage);
|
||||
int32_t Initialize(const std::string net_name, const std::string dump_folder_path, bool is_sync_mode,
|
||||
uint64_t max_mem_usage);
|
||||
|
||||
int32_t AddWatchpoint(
|
||||
unsigned int id, unsigned int watch_condition,
|
||||
|
@ -207,13 +208,13 @@ class DbgServices {
|
|||
|
||||
std::vector<std::shared_ptr<TensorData>> ReadTensorsUtil(std::vector<tensor_info_t> info);
|
||||
|
||||
std::vector<tensor_data_t> ReadTensors(std::vector<tensor_info_t> info);
|
||||
std::vector<tensor_data_t> ReadTensors(const std::vector<tensor_info_t> info);
|
||||
|
||||
std::vector<TensorBaseData> ReadTensorsBase(std::vector<tensor_info_t> info);
|
||||
std::vector<TensorBaseData> ReadTensorsBase(const std::vector<tensor_info_t> info);
|
||||
|
||||
std::vector<TensorStatData> ReadTensorsStat(std::vector<tensor_info_t> info);
|
||||
std::vector<TensorStatData> ReadTensorsStat(const std::vector<tensor_info_t> info);
|
||||
|
||||
std::string GetVersion();
|
||||
std::string GetVersion() const;
|
||||
};
|
||||
|
||||
#endif // DEBUG_DBG_SERVICES_H_
|
||||
|
|
|
@ -20,7 +20,7 @@
|
|||
|
||||
PYBIND11_MODULE(_mindspore_offline_debug, m) {
|
||||
m.doc() = "pybind11 debug services api";
|
||||
py::class_<DbgServices>(m, "DbgServices")
|
||||
(void)py::class_<DbgServices>(m, "DbgServices")
|
||||
.def(py::init<bool>())
|
||||
.def("Initialize", &DbgServices::Initialize)
|
||||
.def("AddWatchpoint", &DbgServices::AddWatchpoint)
|
||||
|
@ -31,7 +31,7 @@ PYBIND11_MODULE(_mindspore_offline_debug, m) {
|
|||
.def("ReadTensorsStat", &DbgServices::ReadTensorsStat)
|
||||
.def("GetVersion", &DbgServices::GetVersion);
|
||||
|
||||
py::class_<parameter_t>(m, "parameter")
|
||||
(void)py::class_<parameter_t>(m, "parameter")
|
||||
.def(py::init<std::string, bool, double, bool, double>())
|
||||
.def("get_name", ¶meter_t::get_name)
|
||||
.def("get_disabled", ¶meter_t::get_disabled)
|
||||
|
@ -39,7 +39,7 @@ PYBIND11_MODULE(_mindspore_offline_debug, m) {
|
|||
.def("get_hit", ¶meter_t::get_hit)
|
||||
.def("get_actual_value", ¶meter_t::get_actual_value);
|
||||
|
||||
py::class_<watchpoint_hit_t>(m, "watchpoint_hit")
|
||||
(void)py::class_<watchpoint_hit_t>(m, "watchpoint_hit")
|
||||
.def(py::init<std::string, uint32_t, int, uint32_t, std::vector<parameter_t>, int32_t, uint32_t, uint32_t>())
|
||||
.def("get_name", &watchpoint_hit_t::get_name)
|
||||
.def("get_slot", &watchpoint_hit_t::get_slot)
|
||||
|
@ -50,7 +50,7 @@ PYBIND11_MODULE(_mindspore_offline_debug, m) {
|
|||
.def("get_rank_id", &watchpoint_hit_t::get_rank_id)
|
||||
.def("get_root_graph_id", &watchpoint_hit_t::get_root_graph_id);
|
||||
|
||||
py::class_<tensor_info_t>(m, "tensor_info")
|
||||
(void)py::class_<tensor_info_t>(m, "tensor_info")
|
||||
.def(py::init<std::string, uint32_t, uint32_t, uint32_t, uint32_t, bool>())
|
||||
.def("get_node_name", &tensor_info_t::get_node_name)
|
||||
.def("get_slot", &tensor_info_t::get_slot)
|
||||
|
@ -59,20 +59,20 @@ PYBIND11_MODULE(_mindspore_offline_debug, m) {
|
|||
.def("get_root_graph_id", &tensor_info_t::get_root_graph_id)
|
||||
.def("get_is_output", &tensor_info_t::get_is_output);
|
||||
|
||||
py::class_<tensor_data_t>(m, "tensor_data")
|
||||
(void)py::class_<tensor_data_t>(m, "tensor_data")
|
||||
.def(py::init<char *, uint64_t, int, std::vector<int64_t>>())
|
||||
.def("get_data_ptr", &tensor_data_t::get_data_ptr)
|
||||
.def("get_data_size", &tensor_data_t::get_data_size)
|
||||
.def("get_dtype", &tensor_data_t::get_dtype)
|
||||
.def("get_shape", &tensor_data_t::get_shape);
|
||||
|
||||
py::class_<TensorBaseData>(m, "TensorBaseData")
|
||||
(void)py::class_<TensorBaseData>(m, "TensorBaseData")
|
||||
.def(py::init<uint64_t, int, std::vector<int64_t>>())
|
||||
.def("data_size", &TensorBaseData::data_size)
|
||||
.def("dtype", &TensorBaseData::dtype)
|
||||
.def("shape", &TensorBaseData::shape);
|
||||
|
||||
py::class_<TensorStatData>(m, "TensorStatData")
|
||||
(void)py::class_<TensorStatData>(m, "TensorStatData")
|
||||
.def(
|
||||
py::init<uint64_t, int, std::vector<int64_t>, bool, double, double, double, int, int, int, int, int, int, int>())
|
||||
.def("data_size", &TensorStatData::data_size)
|
||||
|
|
|
@ -125,9 +125,15 @@ void TensorSummary<T>::SummarizeTensor(const std::vector<DebugServices::watchpoi
|
|||
MS_LOG(DEBUG) << "Current and previous tensor are not the same size.";
|
||||
}
|
||||
}
|
||||
inf_count_ += std::isinf(current_value);
|
||||
nan_count_ += std::isnan(current_value);
|
||||
zero_count_ += (current_value == 0);
|
||||
if (std::isinf(current_value)) {
|
||||
inf_count_ += 1;
|
||||
}
|
||||
if (std::isnan(current_value)) {
|
||||
nan_count_ += 1;
|
||||
}
|
||||
if (current_value == 0) {
|
||||
zero_count_ += 1;
|
||||
}
|
||||
max_ = std::max(max_, current_value);
|
||||
min_ = std::min(min_, current_value);
|
||||
if (mean_sd_cal_enabled_) {
|
||||
|
@ -140,11 +146,11 @@ void TensorSummary<T>::SummarizeTensor(const std::vector<DebugServices::watchpoi
|
|||
range_count.second->ProcessElement(current_value);
|
||||
}
|
||||
for (auto &mean : means_) {
|
||||
if (mean.first == "curr_prev_diff_mean") {
|
||||
if (mean.first.compare("curr_prev_diff_mean") == 0) {
|
||||
mean.second->ProcessElement(std::abs(current_value - previous_value));
|
||||
} else if (mean.first == "abs_prev_mean") {
|
||||
} else if (mean.first.compare("abs_prev_mean") == 0) {
|
||||
mean.second->ProcessElement(std::abs(previous_value));
|
||||
} else if (mean.first == "abs_current_mean") {
|
||||
} else if (mean.first.compare("abs_current_mean") == 0) {
|
||||
mean.second->ProcessElement(std::abs(current_value));
|
||||
}
|
||||
}
|
||||
|
@ -166,8 +172,12 @@ void TensorSummary<T>::TensorStatistics(DbgDataType dtype_value) {
|
|||
neg_inf_count_ += 1;
|
||||
}
|
||||
}
|
||||
zero_count_ += (current_value == 0);
|
||||
nan_count_ += std::isnan(current_value);
|
||||
if (current_value == 0) {
|
||||
zero_count_ += 1;
|
||||
}
|
||||
if (std::isnan(current_value)) {
|
||||
nan_count_ += 1;
|
||||
}
|
||||
if (!(std::isnan(current_value) || std::isinf(current_value))) {
|
||||
// only considering tensor elements with value
|
||||
if (std::signbit(current_value) && !(current_value == 0)) {
|
||||
|
@ -193,9 +203,9 @@ std::tuple<bool, int, std::vector<DebugServices::parameter_t>> TensorSummary<T>:
|
|||
std::bitset<bit_size> error_code;
|
||||
CONDITION_TYPE type = wp.condition.type;
|
||||
// bit 0 denotes presence of nan
|
||||
error_code.set(0, nan_count_ > 0);
|
||||
(void)error_code.set(0, nan_count_ > 0);
|
||||
// bit 1 denotes presence of inf
|
||||
error_code.set(1, inf_count_ > 0);
|
||||
(void)error_code.set(1, inf_count_ > 0);
|
||||
|
||||
if (type == CONDITION_TYPE::HAS_NAN) {
|
||||
error_code.reset();
|
||||
|
@ -319,10 +329,10 @@ void TensorSummary<T>::InitCalculators(const std::vector<DebugServices::watchpoi
|
|||
range_counts_[wp_id]->set_range_end_inclusive(wp.parameter_list[1].value);
|
||||
}
|
||||
} else if (wp.tensor_update_ratio_mean_enabled() && prev_tensor_ptr_) {
|
||||
means_.insert({"curr_prev_diff_mean", std::make_unique<MeanCalculator>()});
|
||||
means_.insert({"abs_prev_mean", std::make_unique<MeanCalculator>()});
|
||||
(void)means_.insert({"curr_prev_diff_mean", std::make_unique<MeanCalculator>()});
|
||||
(void)means_.insert({"abs_prev_mean", std::make_unique<MeanCalculator>()});
|
||||
} else if (wp.abs_mean_enabled()) {
|
||||
means_.insert({"abs_current_mean", std::make_unique<MeanCalculator>()});
|
||||
(void)means_.insert({"abs_current_mean", std::make_unique<MeanCalculator>()});
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
|
@ -118,6 +118,40 @@ class DbgServices():
|
|||
self.initialized = True
|
||||
return self.dbg_instance.Initialize(net_name, self.dump_file_path, is_sync_mode, max_mem_usage)
|
||||
|
||||
def transform_check_node_list(self, info_name, info_param, node_name, check_node_list):
|
||||
"""
|
||||
Transforming check_node_list based on info_name and info_param.
|
||||
|
||||
Args:
|
||||
info_name (str): Info name of check_node_list, either 'rank_id', 'root_graph_id' or 'is_output'
|
||||
info_param (list[int]): Info parameters of check_node_list, mapped to info_name.
|
||||
node_name (str): Node name as key of check_node_list.
|
||||
check_node_list (dict): Dictionary of node names (str or '*' to check all nodes) as key,
|
||||
mapping to rank_id (list of ints or '*' to check all devices),
|
||||
root_graph_id (list of ints or '*' to check all graphs) and is_output (bool).
|
||||
|
||||
Returns:
|
||||
Transformed check_node_list.
|
||||
|
||||
Examples:
|
||||
>>> from mindspore.offline_debug import dbg_services
|
||||
>>> d = dbg_services.DbgServices(dump_file_path="dump_file_path",
|
||||
>>> verbose=True)
|
||||
>>> d_init = d.initialize(is_sync_mode=True)
|
||||
>>> d_wp = d_init.transform_check_node_list(info_name="rank_id",
|
||||
>>> info_param=[0],
|
||||
>>> node_name="conv2.bias",
|
||||
>>> check_node_list={"conv2.bias" : {"rank_id": [0],
|
||||
>>> root_graph_id: [0],
|
||||
>>> "is_output": True}})
|
||||
"""
|
||||
if info_name in ["rank_id", "root_graph_id"]:
|
||||
if info_param in ["*"]:
|
||||
check_node_list[node_name][info_name] = ["*"]
|
||||
else:
|
||||
check_node_list[node_name][info_name] = list(map(str, info_param))
|
||||
return check_node_list
|
||||
|
||||
@check_initialize_done
|
||||
@check_add_watchpoint
|
||||
def add_watchpoint(self, watchpoint_id, watch_condition, check_node_list, parameter_list):
|
||||
|
@ -155,11 +189,7 @@ class DbgServices():
|
|||
log("in Python AddWatchpoint")
|
||||
for node_name, node_info in check_node_list.items():
|
||||
for info_name, info_param in node_info.items():
|
||||
if info_name in ["rank_id", "root_graph_id"]:
|
||||
if info_param in ["*"]:
|
||||
check_node_list[node_name][info_name] = ["*"]
|
||||
else:
|
||||
check_node_list[node_name][info_name] = list(map(str, info_param))
|
||||
check_node_list = self.transform_check_node_list(info_name, info_param, node_name, check_node_list)
|
||||
parameter_list_inst = []
|
||||
for elem in parameter_list:
|
||||
parameter_list_inst.append(elem.instance)
|
||||
|
|
Loading…
Reference in New Issue