forked from mindspore-Ecosystem/mindspore
Offline debugger
Authors: John Tzanakakis, Adel Shafiei, Amir Lashkari, Islam Amin
This commit is contained in:
parent
3d4a1aaff1
commit
da3b13a0e1
|
@ -63,6 +63,16 @@ install(
|
|||
COMPONENT mindspore
|
||||
)
|
||||
|
||||
if(CMAKE_SYSTEM_NAME MATCHES "Windows")
|
||||
message("offline debugger does not support windows system temporarily")
|
||||
else()
|
||||
install(
|
||||
TARGETS _mindspore_offline_debug
|
||||
DESTINATION ${INSTALL_BASE_DIR}
|
||||
COMPONENT mindspore
|
||||
)
|
||||
endif()
|
||||
|
||||
install(
|
||||
TARGETS mindspore_shared_lib
|
||||
DESTINATION ${INSTALL_LIB_DIR}
|
||||
|
@ -317,6 +327,18 @@ if(EXISTS ${CMAKE_SOURCE_DIR}/mindspore/dataset)
|
|||
)
|
||||
endif()
|
||||
|
||||
if(CMAKE_SYSTEM_NAME MATCHES "Windows")
|
||||
message("offline debugger does not support windows system temporarily")
|
||||
else()
|
||||
if(EXISTS ${CMAKE_SOURCE_DIR}/mindspore/offline_debug)
|
||||
install(
|
||||
DIRECTORY ${CMAKE_SOURCE_DIR}/mindspore/offline_debug
|
||||
DESTINATION ${INSTALL_PY_DIR}
|
||||
COMPONENT mindspore
|
||||
)
|
||||
endif()
|
||||
endif()
|
||||
|
||||
## Public header files
|
||||
install(
|
||||
DIRECTORY ${CMAKE_SOURCE_DIR}/include
|
||||
|
|
|
@ -1,3 +1,6 @@
|
|||
include_directories(${CMAKE_SOURCE_DIR}/mindspore/ccsrc/debug/)
|
||||
include_directories(${CMAKE_BINARY_DIR})
|
||||
|
||||
set(_DEBUG_SRC_LIST
|
||||
"${CMAKE_CURRENT_SOURCE_DIR}/anf_ir_dump.cc"
|
||||
"${CMAKE_CURRENT_SOURCE_DIR}/anf_ir_utils.cc"
|
||||
|
@ -8,6 +11,14 @@ set(_DEBUG_SRC_LIST
|
|||
"${CMAKE_CURRENT_SOURCE_DIR}/env_config_parser.cc"
|
||||
)
|
||||
|
||||
set(_OFFLINE_SRC_LIST
|
||||
"${CMAKE_CURRENT_SOURCE_DIR}/debug_services.cc"
|
||||
"${CMAKE_CURRENT_SOURCE_DIR}/debugger/tensor_summary.cc"
|
||||
"${CMAKE_CURRENT_SOURCE_DIR}/debugger/offline_debug/offline_logger.cc"
|
||||
"${CMAKE_CURRENT_SOURCE_DIR}/debugger/offline_debug/dbg_services.cc"
|
||||
"${CMAKE_CURRENT_SOURCE_DIR}/debugger/offline_debug/mi_pybind_register.cc"
|
||||
)
|
||||
|
||||
if(ENABLE_DUMP_IR)
|
||||
file(GLOB_RECURSE _RDR_SRC_LIST RELATIVE ${CMAKE_CURRENT_SOURCE_DIR} "rdr/*.cc")
|
||||
if(NOT ENABLE_D)
|
||||
|
@ -38,3 +49,13 @@ endif()
|
|||
set_property(SOURCE ${_DEBUG_SRC_LIST} ${_RDR_SRC_LIST} PROPERTY COMPILE_DEFINITIONS
|
||||
SUBMODULE_ID=mindspore::SubModuleId::SM_DEBUG)
|
||||
add_library(_mindspore_debug_obj OBJECT ${_DEBUG_SRC_LIST} ${_RDR_SRC_LIST})
|
||||
if(NOT CMAKE_SYSTEM_NAME MATCHES "Windows")
|
||||
add_compile_options(-Wall -DOFFLINE_DBG_MODE -fPIC -O2)
|
||||
set_property(SOURCE ${_OFFLINE_SRC_LIST} PROPERTY COMPILE_DEFINITIONS
|
||||
SUBMODULE_ID=mindspore::SubModuleId::SM_OFFLINE_DEBUG)
|
||||
add_library(_mindspore_offline_debug SHARED ${_OFFLINE_SRC_LIST})
|
||||
set_target_properties(_mindspore_offline_debug PROPERTIES
|
||||
PREFIX "${PYTHON_MODULE_PREFIX}"
|
||||
SUFFIX "${PYTHON_MODULE_EXTENSION}"
|
||||
)
|
||||
endif()
|
||||
|
|
|
@ -13,14 +13,19 @@
|
|||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
#include "debug/debug_services.h"
|
||||
#include <dirent.h>
|
||||
#include <fstream>
|
||||
#include <algorithm>
|
||||
#include <map>
|
||||
#include <unordered_set>
|
||||
#ifdef ONLINE_DBG_MODE
|
||||
#include "backend/session/anf_runtime_algorithm.h"
|
||||
#include "debug/debug_services.h"
|
||||
#endif
|
||||
#include "debug/debugger/tensor_summary.h"
|
||||
|
||||
#ifdef ONLINE_DBG_MODE
|
||||
namespace mindspore {
|
||||
|
||||
#endif
|
||||
DebugServices::DebugServices() {
|
||||
tensor_loader_ = new TensorLoader();
|
||||
uint32_t iter_num = -1;
|
||||
|
@ -42,9 +47,11 @@ DebugServices &DebugServices::operator=(const DebugServices &other) {
|
|||
|
||||
DebugServices::~DebugServices() { delete tensor_loader_; }
|
||||
|
||||
void DebugServices::AddWatchpoint(unsigned int id, unsigned int watch_condition, float parameter,
|
||||
const std::vector<std::tuple<std::string, bool>> &check_node_list,
|
||||
const std::vector<parameter_t> ¶meter_list) {
|
||||
void DebugServices::AddWatchpoint(
|
||||
unsigned int id, unsigned int watch_condition, float parameter,
|
||||
const std::vector<std::tuple<std::string, bool>> &check_node_list, const std::vector<parameter_t> ¶meter_list,
|
||||
const std::vector<std::tuple<std::string, std::vector<uint32_t>>> *check_node_device_list,
|
||||
const std::vector<std::tuple<std::string, std::vector<uint32_t>>> *check_node_graph_list) {
|
||||
std::lock_guard<std::mutex> lg(lock_);
|
||||
|
||||
watchpoint_t watchpoint_item;
|
||||
|
@ -52,6 +59,12 @@ void DebugServices::AddWatchpoint(unsigned int id, unsigned int watch_condition,
|
|||
watchpoint_item.condition.type = static_cast<CONDITION_TYPE>(watch_condition);
|
||||
watchpoint_item.condition.parameter = parameter;
|
||||
watchpoint_item.check_node_list = check_node_list;
|
||||
if (check_node_device_list != nullptr) {
|
||||
watchpoint_item.check_node_device_list = *check_node_device_list;
|
||||
}
|
||||
if (check_node_graph_list != nullptr) {
|
||||
watchpoint_item.check_node_graph_list = *check_node_graph_list;
|
||||
}
|
||||
watchpoint_item.parameter_list = parameter_list;
|
||||
watchpoint_table[id] = watchpoint_item;
|
||||
}
|
||||
|
@ -61,122 +74,170 @@ void DebugServices::RemoveWatchpoint(unsigned int id) {
|
|||
watchpoint_table.erase(id);
|
||||
}
|
||||
|
||||
std::unique_ptr<ITensorSummary> GetSummaryPtr(const std::shared_ptr<TensorData> &tensor, void *previous_tensor_ptr,
|
||||
uint32_t num_elements, int tensor_dtype) {
|
||||
switch (tensor_dtype) {
|
||||
case DbgDataType::DT_UINT8: {
|
||||
return std::make_unique<TensorSummary<uint8_t>>(tensor->GetDataPtr(), previous_tensor_ptr, num_elements);
|
||||
}
|
||||
case DbgDataType::DT_INT8: {
|
||||
return std::make_unique<TensorSummary<int8_t>>(tensor->GetDataPtr(), previous_tensor_ptr, num_elements);
|
||||
}
|
||||
case DbgDataType::DT_UINT16: {
|
||||
return std::make_unique<TensorSummary<uint16_t>>(tensor->GetDataPtr(), previous_tensor_ptr, num_elements);
|
||||
}
|
||||
case DbgDataType::DT_INT16: {
|
||||
return std::make_unique<TensorSummary<int16_t>>(tensor->GetDataPtr(), previous_tensor_ptr, num_elements);
|
||||
}
|
||||
case DbgDataType::DT_UINT32: {
|
||||
return std::make_unique<TensorSummary<uint32_t>>(tensor->GetDataPtr(), previous_tensor_ptr, num_elements);
|
||||
}
|
||||
case DbgDataType::DT_INT32:
|
||||
case DbgDataType::DT_BASE_INT: {
|
||||
return std::make_unique<TensorSummary<int32_t>>(tensor->GetDataPtr(), previous_tensor_ptr, num_elements);
|
||||
}
|
||||
case DbgDataType::DT_UINT64: {
|
||||
return std::make_unique<TensorSummary<uint64_t>>(tensor->GetDataPtr(), previous_tensor_ptr, num_elements);
|
||||
}
|
||||
case DbgDataType::DT_INT64: {
|
||||
return std::make_unique<TensorSummary<int64_t>>(tensor->GetDataPtr(), previous_tensor_ptr, num_elements);
|
||||
}
|
||||
case DbgDataType::DT_FLOAT16: {
|
||||
return std::make_unique<TensorSummary<float16>>(tensor->GetDataPtr(), previous_tensor_ptr, num_elements);
|
||||
}
|
||||
case DbgDataType::DT_FLOAT32:
|
||||
case DbgDataType::DT_BASE_FLOAT: {
|
||||
return std::make_unique<TensorSummary<float>>(tensor->GetDataPtr(), previous_tensor_ptr, num_elements);
|
||||
}
|
||||
case DbgDataType::DT_FLOAT64: {
|
||||
return std::make_unique<TensorSummary<double>>(tensor->GetDataPtr(), previous_tensor_ptr, num_elements);
|
||||
}
|
||||
case DbgDataType::DT_BOOL: {
|
||||
return std::make_unique<TensorSummary<bool>>(tensor->GetDataPtr(), previous_tensor_ptr, num_elements);
|
||||
}
|
||||
default:
|
||||
MS_LOG(INFO) << "Unsupported tensor type";
|
||||
// return a null pointer
|
||||
return std::unique_ptr<TensorSummary<int32_t>>{};
|
||||
}
|
||||
}
|
||||
|
||||
#ifdef OFFLINE_DBG_MODE
|
||||
void *DebugServices::GetPrevTensor(const std::shared_ptr<TensorData> &tensor, bool previous_iter_tensor_needed) {
|
||||
void *previous_tensor_ptr = nullptr;
|
||||
std::shared_ptr<TensorData> tensor_prev;
|
||||
if (previous_iter_tensor_needed && tensor->GetIteration() > 1) {
|
||||
// read data in offline mode
|
||||
std::vector<std::shared_ptr<TensorData>> result_list_prev;
|
||||
ReadDumpedTensor(std::vector<std::string>{tensor->GetName()}, std::vector<size_t>{tensor->GetSlot()},
|
||||
std::vector<unsigned int>{tensor->GetDeviceId()},
|
||||
std::vector<unsigned int>{tensor->GetIteration() - 1},
|
||||
std::vector<unsigned int>{tensor->GetRootGraphId()}, &result_list_prev);
|
||||
tensor_prev = result_list_prev[0];
|
||||
if (!tensor_prev->GetByteSize()) {
|
||||
tensor_prev.reset();
|
||||
} else {
|
||||
previous_tensor_ptr = tensor_prev->GetDataPtr();
|
||||
}
|
||||
}
|
||||
return previous_tensor_ptr;
|
||||
}
|
||||
#endif
|
||||
|
||||
void DebugServices::AddWatchPointsToCheck(bool init_dbg_suspend, bool step_end, bool recheck,
|
||||
const std::string &tensor_name, const std::string &tensor_name_no_slot,
|
||||
bool *previous_iter_tensor_needed, std::string *qualified_tensor_name,
|
||||
std::vector<watchpoint_t> *watchpoints_to_check) {
|
||||
for (auto w_table_item : watchpoint_table) {
|
||||
auto wp = std::get<1>(w_table_item);
|
||||
// check ONLY init conditions on initial suspended state.
|
||||
// skip other conditions on initial suspended state
|
||||
if (init_dbg_suspend && (wp.condition.type != INIT)) continue;
|
||||
// skip init condition if not init suspend
|
||||
if ((wp.condition.type == INIT) && !init_dbg_suspend) continue;
|
||||
// check change conditions only on step end.
|
||||
if (wp.change_condition() && !step_end) continue;
|
||||
// if recheck, ignore the cache results and reanalyze everything.
|
||||
// if not a recheck, check only unanalyzed tensors
|
||||
if (!recheck && wp_id_cache[tensor_name].count(wp.id)) continue;
|
||||
std::string found = wp.FindQualifiedTensorName(tensor_name_no_slot);
|
||||
if (!found.empty()) {
|
||||
*qualified_tensor_name = found;
|
||||
watchpoints_to_check->push_back(w_table_item.second);
|
||||
#ifdef OFFLINE_DBG_MODE
|
||||
if (wp.change_condition()) {
|
||||
*previous_iter_tensor_needed = true;
|
||||
}
|
||||
#endif
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void DebugServices::AddAnalyzedTensorToCache(const bool recheck, const unsigned int id,
|
||||
const std::string &tensor_name) {
|
||||
// add analyzed tensor to cache
|
||||
if (!recheck) {
|
||||
wp_id_cache[tensor_name].insert(id);
|
||||
}
|
||||
}
|
||||
|
||||
void DebugServices::CheckWatchpoints(std::vector<std::string> *name, std::vector<std::string> *slot,
|
||||
std::vector<int> *condition, std::vector<unsigned int> *watchpoint_id,
|
||||
std::vector<std::vector<parameter_t>> *parameters,
|
||||
std::vector<int32_t> *error_codes, const std::vector<std::string> &op_overflows,
|
||||
const std::vector<std::shared_ptr<TensorData>> &tensor_list,
|
||||
const bool init_dbg_suspend, const bool step_end, const bool recheck) {
|
||||
std::vector<std::shared_ptr<TensorData>> *tensor_list, const bool init_dbg_suspend,
|
||||
const bool step_end, const bool recheck, std::vector<unsigned int> *device_id,
|
||||
std::vector<unsigned int> *root_graph_id) {
|
||||
std::lock_guard<std::mutex> lg(lock_);
|
||||
if (watchpoint_table.empty()) return;
|
||||
|
||||
for (const auto &tensor : tensor_list) {
|
||||
for (auto &tensor : *tensor_list) {
|
||||
#ifdef OFFLINE_DBG_MODE
|
||||
// read data in offline mode
|
||||
std::vector<std::shared_ptr<TensorData>> result_list;
|
||||
ReadDumpedTensor(std::vector<std::string>{tensor->GetName()}, std::vector<size_t>{tensor->GetSlot()},
|
||||
std::vector<unsigned int>{tensor->GetDeviceId()},
|
||||
std::vector<unsigned int>{tensor->GetIteration()},
|
||||
std::vector<unsigned int>{tensor->GetRootGraphId()}, &result_list);
|
||||
tensor = result_list[0];
|
||||
if (!tensor->GetByteSize()) {
|
||||
tensor.reset();
|
||||
continue;
|
||||
}
|
||||
#endif
|
||||
|
||||
const auto tensor_name = tensor->GetName();
|
||||
const auto tensor_name_no_slot = tensor_name.substr(0, tensor_name.find_first_of(':'));
|
||||
const auto tensor_slot = std::to_string(tensor->GetSlot());
|
||||
mindspore::tensor::TensorPtr tensor_ptr = tensor->GetTensor();
|
||||
// no elements to analyze
|
||||
if (tensor_ptr->DataSize() == 0) continue;
|
||||
int tensor_dtype = tensor_ptr->data_type_c();
|
||||
if (tensor->GetByteSize() == 0) continue;
|
||||
int tensor_dtype = tensor->GetType();
|
||||
std::vector<watchpoint_t> watchpoints_to_check;
|
||||
std::string qualified_tensor_name;
|
||||
for (auto w_table_item : watchpoint_table) {
|
||||
auto wp = std::get<1>(w_table_item);
|
||||
// check ONLY init conditions on intial suspended state.
|
||||
// skip other conditions on intial suspended state
|
||||
if (init_dbg_suspend && (wp.condition.type != INIT)) continue;
|
||||
// skip init condition if not init suspend
|
||||
if ((wp.condition.type == INIT) && !init_dbg_suspend) continue;
|
||||
// check change conditions only on step end.
|
||||
if (wp.change_condition() && !step_end) continue;
|
||||
// if recheck, ignore the cache results and reanalyze everything.
|
||||
// if not a recheck, check only unanalyzed tensors
|
||||
if (!recheck && wp_id_cache[tensor_name].count(wp.id)) continue;
|
||||
std::string found = wp.FindQualifiedTensorName(tensor_name_no_slot);
|
||||
if (!found.empty()) {
|
||||
qualified_tensor_name = found;
|
||||
watchpoints_to_check.push_back(w_table_item.second);
|
||||
}
|
||||
}
|
||||
bool previous_iter_tensor_needed = false;
|
||||
// Add do nothing line in case offline debug is off, prevent unused var warning
|
||||
(void)previous_iter_tensor_needed;
|
||||
AddWatchPointsToCheck(init_dbg_suspend, step_end, recheck, tensor_name, tensor_name_no_slot,
|
||||
&previous_iter_tensor_needed, &qualified_tensor_name, &watchpoints_to_check);
|
||||
// no wp set on current tensor
|
||||
if (watchpoints_to_check.empty()) continue;
|
||||
|
||||
uint32_t num_elements = tensor_ptr->DataSize();
|
||||
void *previous_tensor_ptr = tensor_loader_->GetPrevTensor(tensor_name)
|
||||
? tensor_loader_->GetPrevTensor(tensor_name)->GetTensor()->data_c()
|
||||
: nullptr;
|
||||
uint32_t num_elements = tensor->GetNumElements();
|
||||
|
||||
#ifdef OFFLINE_DBG_MODE
|
||||
void *previous_tensor_ptr = GetPrevTensor(tensor, previous_iter_tensor_needed);
|
||||
#else
|
||||
void *previous_tensor_ptr =
|
||||
tensor_loader_->GetPrevTensor(tensor_name) ? tensor_loader_->GetPrevTensor(tensor_name)->GetDataPtr() : nullptr;
|
||||
#endif
|
||||
|
||||
std::unique_ptr<ITensorSummary> base_summary_ptr;
|
||||
if (!(watchpoints_to_check.size() == 1 && watchpoints_to_check[0].condition.type == IS_OVERFLOW)) {
|
||||
switch (tensor_dtype) {
|
||||
case kNumberTypeUInt8: {
|
||||
base_summary_ptr =
|
||||
std::make_unique<TensorSummary<uint8_t>>(tensor_ptr->data_c(), previous_tensor_ptr, num_elements);
|
||||
break;
|
||||
}
|
||||
case kNumberTypeInt8: {
|
||||
base_summary_ptr =
|
||||
std::make_unique<TensorSummary<int8_t>>(tensor_ptr->data_c(), previous_tensor_ptr, num_elements);
|
||||
break;
|
||||
}
|
||||
case kNumberTypeUInt16: {
|
||||
base_summary_ptr =
|
||||
std::make_unique<TensorSummary<uint16_t>>(tensor_ptr->data_c(), previous_tensor_ptr, num_elements);
|
||||
break;
|
||||
}
|
||||
case kNumberTypeInt16: {
|
||||
base_summary_ptr =
|
||||
std::make_unique<TensorSummary<int16_t>>(tensor_ptr->data_c(), previous_tensor_ptr, num_elements);
|
||||
break;
|
||||
}
|
||||
case kNumberTypeUInt32: {
|
||||
base_summary_ptr =
|
||||
std::make_unique<TensorSummary<uint32_t>>(tensor_ptr->data_c(), previous_tensor_ptr, num_elements);
|
||||
break;
|
||||
}
|
||||
case kNumberTypeInt32:
|
||||
case kNumberTypeInt: {
|
||||
base_summary_ptr =
|
||||
std::make_unique<TensorSummary<int32_t>>(tensor_ptr->data_c(), previous_tensor_ptr, num_elements);
|
||||
break;
|
||||
}
|
||||
case kNumberTypeUInt64: {
|
||||
base_summary_ptr =
|
||||
std::make_unique<TensorSummary<uint64_t>>(tensor_ptr->data_c(), previous_tensor_ptr, num_elements);
|
||||
break;
|
||||
}
|
||||
case kNumberTypeInt64: {
|
||||
base_summary_ptr =
|
||||
std::make_unique<TensorSummary<int64_t>>(tensor_ptr->data_c(), previous_tensor_ptr, num_elements);
|
||||
break;
|
||||
}
|
||||
case kNumberTypeFloat16: {
|
||||
base_summary_ptr =
|
||||
std::make_unique<TensorSummary<float16>>(tensor_ptr->data_c(), previous_tensor_ptr, num_elements);
|
||||
break;
|
||||
}
|
||||
case kNumberTypeFloat32:
|
||||
case kNumberTypeFloat: {
|
||||
base_summary_ptr =
|
||||
std::make_unique<TensorSummary<float>>(tensor_ptr->data_c(), previous_tensor_ptr, num_elements);
|
||||
break;
|
||||
}
|
||||
case kNumberTypeFloat64: {
|
||||
base_summary_ptr =
|
||||
std::make_unique<TensorSummary<double>>(tensor_ptr->data_c(), previous_tensor_ptr, num_elements);
|
||||
break;
|
||||
}
|
||||
case kNumberTypeBool: {
|
||||
base_summary_ptr =
|
||||
std::make_unique<TensorSummary<bool>>(tensor_ptr->data_c(), previous_tensor_ptr, num_elements);
|
||||
break;
|
||||
}
|
||||
default:
|
||||
MS_LOG(INFO) << "Unsupported tensor type";
|
||||
continue;
|
||||
base_summary_ptr = GetSummaryPtr(tensor, previous_tensor_ptr, num_elements, tensor_dtype);
|
||||
if (base_summary_ptr != nullptr) {
|
||||
base_summary_ptr->SummarizeTensor(watchpoints_to_check);
|
||||
}
|
||||
base_summary_ptr->SummarizeTensor(watchpoints_to_check);
|
||||
}
|
||||
|
||||
for (auto &wp : watchpoints_to_check) {
|
||||
bool is_hit = false;
|
||||
int error_code = 0;
|
||||
|
@ -189,26 +250,439 @@ void DebugServices::CheckWatchpoints(std::vector<std::string> *name, std::vector
|
|||
error_code = std::get<1>(item);
|
||||
parameter_list = std::get<2>(item);
|
||||
}
|
||||
// add analyzed tensor to cache
|
||||
if (!recheck) {
|
||||
wp_id_cache[tensor_name].insert(wp.id);
|
||||
}
|
||||
AddAnalyzedTensorToCache(recheck, wp.id, tensor_name);
|
||||
|
||||
if (is_hit || error_code) {
|
||||
name->push_back(qualified_tensor_name);
|
||||
slot->push_back(tensor_slot);
|
||||
condition->push_back(wp.condition.type);
|
||||
watchpoint_id->push_back(wp.id);
|
||||
if (device_id != nullptr) {
|
||||
device_id->push_back(tensor->GetDeviceId());
|
||||
}
|
||||
if (root_graph_id != nullptr) {
|
||||
root_graph_id->push_back(tensor->GetRootGraphId());
|
||||
}
|
||||
parameters->push_back(parameter_list);
|
||||
error_codes->push_back(error_code);
|
||||
}
|
||||
}
|
||||
|
||||
#ifdef OFFLINE_DBG_MODE
|
||||
// in offline mode remove the need for the data
|
||||
tensor.reset();
|
||||
#endif
|
||||
}
|
||||
}
|
||||
|
||||
#ifdef OFFLINE_DBG_MODE
|
||||
void DebugServices::GetSlotInfo(const std::string &file_name, const std::string &dump_name,
|
||||
const std::string &specific_dump_dir, std::vector<size_t> *slot_list) {
|
||||
if (is_sync_mode) {
|
||||
// get the slot from the name
|
||||
std::string delimiter = "_";
|
||||
unsigned int start_pos = dump_name.length();
|
||||
unsigned int end_pos = file_name.find(delimiter, start_pos);
|
||||
std::string item = file_name.substr(start_pos, end_pos - start_pos);
|
||||
slot_list->push_back(std::stoul(item));
|
||||
} else {
|
||||
std::string out_dir = "/tmp/" + file_name;
|
||||
std::string input_file = specific_dump_dir + "/" + file_name;
|
||||
std::string log_enabled = DbgLogger::verbose ? "" : "> /dev/null";
|
||||
std::string convert_command =
|
||||
"python /usr/local/Ascend/toolkit/tools/operator_cmp/compare/msaccucmp.pyc convert -d " + input_file + " -out " +
|
||||
out_dir + " -t bin " + log_enabled;
|
||||
(void)(system(convert_command.c_str()) + 1);
|
||||
convert_command = "python /usr/local/Ascend/toolkit/tools/operator_cmp/compare/msaccucmp.pyc convert -d " +
|
||||
input_file + " -out " + out_dir + " -f NCHW -t bin " + log_enabled;
|
||||
(void)(system(convert_command.c_str()) + 1);
|
||||
|
||||
std::string prefix_converted_dump_file_name = file_name + ".output.";
|
||||
DIR *convert_dir_ptr = opendir(out_dir.c_str());
|
||||
if (convert_dir_ptr != nullptr) {
|
||||
struct dirent *convert_dir_contents = nullptr;
|
||||
while ((convert_dir_contents = readdir(convert_dir_ptr)) != NULL) {
|
||||
if (convert_dir_contents->d_type == DT_REG) {
|
||||
std::string converted_file_name = convert_dir_contents->d_name;
|
||||
std::size_t nd_file = converted_file_name.rfind(".ND.bin");
|
||||
std::size_t fractal_z_file = converted_file_name.rfind(".FRACTAL_Z.bin");
|
||||
std::size_t nchw_file = converted_file_name.rfind(".NCHW.bin");
|
||||
if (nd_file == std::string::npos && nchw_file == std::string::npos && fractal_z_file == std::string::npos) {
|
||||
continue;
|
||||
}
|
||||
std::size_t found_c = converted_file_name.find(prefix_converted_dump_file_name);
|
||||
if (found_c != 0) {
|
||||
continue;
|
||||
}
|
||||
std::size_t slot_start_pos = prefix_converted_dump_file_name.length();
|
||||
std::size_t slot_end_pos = converted_file_name.find(".", slot_start_pos) - 1;
|
||||
std::string slot_item = converted_file_name.substr(slot_start_pos, slot_end_pos - slot_start_pos + 1);
|
||||
slot_list->push_back(std::stoul(slot_item));
|
||||
}
|
||||
}
|
||||
} else {
|
||||
MS_LOG(INFO) << out_dir << " directory does not exist!";
|
||||
}
|
||||
closedir(convert_dir_ptr);
|
||||
|
||||
// std::string delete_cmd = "rm -rf " + out_dir;
|
||||
// system(delete_cmd.c_str());
|
||||
}
|
||||
}
|
||||
|
||||
std::size_t DebugServices::GetShapeTypeInfo(const std::string &specific_dump_dir, std::size_t slot,
|
||||
const std::string &prefix_dump_file_name, std::string *file_name,
|
||||
std::string *type_name, std::string *out_dir, std::vector<int64_t> *shape) {
|
||||
std::size_t found = 0;
|
||||
if (is_sync_mode) {
|
||||
found = file_name->rfind(prefix_dump_file_name, 0);
|
||||
} else {
|
||||
std::string file_name_w_o_prefix = file_name->substr(file_name->find('.') + 1);
|
||||
found = file_name_w_o_prefix.rfind(prefix_dump_file_name, 0);
|
||||
}
|
||||
if (found != 0) {
|
||||
return found;
|
||||
}
|
||||
if (is_sync_mode) {
|
||||
// found a file, now get the shape and type
|
||||
// find "_shape_" in the filename
|
||||
std::string shape_delimiter = "_shape_";
|
||||
unsigned int str_pos = file_name->find(shape_delimiter) + shape_delimiter.length();
|
||||
|
||||
// read numbers with '_' delimter until you read a non-number, that will be the type name
|
||||
bool number_found = true;
|
||||
std::string delimiter = "_";
|
||||
while (number_found) {
|
||||
unsigned int end_pos = file_name->find(delimiter, str_pos);
|
||||
std::string item = file_name->substr(str_pos, end_pos - str_pos);
|
||||
bool is_number = !item.empty() && std::find_if(item.begin(), item.end(),
|
||||
[](unsigned char c) { return !std::isdigit(c); }) == item.end();
|
||||
|
||||
if (is_number) {
|
||||
shape->push_back(std::stoul(item));
|
||||
str_pos = end_pos + 1;
|
||||
} else {
|
||||
*type_name = item;
|
||||
number_found = false;
|
||||
}
|
||||
}
|
||||
} else {
|
||||
*out_dir = "/tmp/" + *file_name;
|
||||
std::string input_file = specific_dump_dir + "/" + *file_name;
|
||||
std::string log_enabled = DbgLogger::verbose ? "" : "> /dev/null";
|
||||
std::string convert_command =
|
||||
"python /usr/local/Ascend/toolkit/tools/operator_cmp/compare/msaccucmp.pyc convert -d " + input_file + " -out " +
|
||||
*out_dir + " -t bin " + log_enabled;
|
||||
(void)(system(convert_command.c_str()) + 1);
|
||||
convert_command = "python /usr/local/Ascend/toolkit/tools/operator_cmp/compare/msaccucmp.pyc convert -d " +
|
||||
input_file + " -out " + *out_dir + " -f NCHW -t bin " + log_enabled;
|
||||
(void)(system(convert_command.c_str()) + 1);
|
||||
|
||||
std::string prefix_converted_dump_file_name = *file_name + ".output." + std::to_string(slot);
|
||||
*file_name = "";
|
||||
DIR *convert_dir_ptr = opendir(out_dir->c_str());
|
||||
if (convert_dir_ptr != nullptr) {
|
||||
struct dirent *convert_dir_contents = nullptr;
|
||||
while ((convert_dir_contents = readdir(convert_dir_ptr)) != NULL) {
|
||||
if (convert_dir_contents->d_type == DT_REG) {
|
||||
std::string converted_file_name = convert_dir_contents->d_name;
|
||||
std::size_t nd_file = converted_file_name.rfind(".ND.bin");
|
||||
std::size_t fractal_z_file = converted_file_name.rfind(".FRACTAL_Z.bin");
|
||||
std::size_t nchw_file = converted_file_name.rfind(".NCHW.bin");
|
||||
if (nd_file == std::string::npos && nchw_file == std::string::npos && fractal_z_file == std::string::npos) {
|
||||
continue;
|
||||
}
|
||||
std::size_t found_c = converted_file_name.rfind(prefix_converted_dump_file_name, 0);
|
||||
if (found_c != 0) {
|
||||
continue;
|
||||
}
|
||||
*file_name = converted_file_name;
|
||||
}
|
||||
}
|
||||
} else {
|
||||
MS_LOG(INFO) << *out_dir << " directory does not exist!";
|
||||
}
|
||||
closedir(convert_dir_ptr);
|
||||
|
||||
if (*file_name == "") {
|
||||
MS_LOG(WARNING) << out_dir << ": no valid files found post msaccucmp exec";
|
||||
return 1;
|
||||
}
|
||||
|
||||
// std::string delete_cmd = "rm -rf " + out_dir;
|
||||
// system(delete_cmd.c_str());
|
||||
|
||||
// found a file, now get the shape and type
|
||||
std::stringstream check_filename(*file_name);
|
||||
std::vector<std::string> tokens;
|
||||
std::string intermediate;
|
||||
|
||||
while (getline(check_filename, intermediate, '.')) {
|
||||
tokens.push_back(intermediate);
|
||||
}
|
||||
*type_name = tokens[8];
|
||||
|
||||
std::string shape_str = tokens[7];
|
||||
std::stringstream check_shape(shape_str);
|
||||
while (getline(check_shape, intermediate, '_')) {
|
||||
shape->push_back(std::stoul(intermediate));
|
||||
}
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
||||
void DebugServices::ReadDumpedTensor(std::vector<std::string> backend_name, std::vector<size_t> slot,
|
||||
std::vector<unsigned int> device_id, std::vector<unsigned int> iteration,
|
||||
std::vector<unsigned int> root_graph_id,
|
||||
std::vector<std::shared_ptr<TensorData>> *result_list) {
|
||||
for (unsigned int i = 0; i < backend_name.size(); i++) {
|
||||
// form prefix of the tensor file to read from graph pb node name
|
||||
std::string dump_style_kernel_name = backend_name[i];
|
||||
const std::string strsrc = "/";
|
||||
|
||||
std::string strdst;
|
||||
if (is_sync_mode) {
|
||||
strdst = "--";
|
||||
} else {
|
||||
strdst = "_";
|
||||
}
|
||||
|
||||
std::string::size_type pos = 0;
|
||||
std::string::size_type srclen = strsrc.size();
|
||||
std::string::size_type dstlen = strdst.size();
|
||||
|
||||
// remove slot from name
|
||||
std::size_t found_colon = dump_style_kernel_name.find_last_of(":");
|
||||
dump_style_kernel_name = dump_style_kernel_name.substr(0, found_colon);
|
||||
|
||||
while ((pos = dump_style_kernel_name.find(strsrc, pos)) != std::string::npos) {
|
||||
dump_style_kernel_name.replace(pos, srclen, strdst);
|
||||
pos += dstlen;
|
||||
}
|
||||
|
||||
std::string prefix_dump_file_name = dump_style_kernel_name;
|
||||
if (is_sync_mode) {
|
||||
prefix_dump_file_name += "_output_" + std::to_string(slot[i]) + "_";
|
||||
}
|
||||
|
||||
std::string specific_dump_dir;
|
||||
if (is_sync_mode) {
|
||||
specific_dump_dir =
|
||||
dump_dir + "/device_" + std::to_string(device_id[i]) + "/iteration_" + std::to_string(iteration[i]);
|
||||
} else {
|
||||
specific_dump_dir = dump_dir + "/device_" + std::to_string(device_id[i]) + "/" + net_name + "_graph_" +
|
||||
std::to_string(root_graph_id[i]) + "/" + std::to_string(root_graph_id[i]) + "/" +
|
||||
std::to_string(iteration[i]);
|
||||
}
|
||||
|
||||
// search files in dir for the one that meets the filename prefix and read the file into memory
|
||||
DIR *d;
|
||||
d = opendir(specific_dump_dir.c_str());
|
||||
std::vector<char> *buffer = NULL;
|
||||
std::string type_name = "";
|
||||
std::vector<int64_t> shape;
|
||||
uint64_t data_size = 0;
|
||||
if (d != nullptr) {
|
||||
struct dirent *dir = nullptr;
|
||||
while ((dir = readdir(d)) != NULL) {
|
||||
if (dir->d_type == DT_REG) {
|
||||
std::string file_name = dir->d_name;
|
||||
std::string out_dir;
|
||||
std::size_t found = GetShapeTypeInfo(specific_dump_dir, slot[i], prefix_dump_file_name, &file_name,
|
||||
&type_name, &out_dir, &shape);
|
||||
if (found != 0) {
|
||||
continue;
|
||||
}
|
||||
|
||||
// read the tensor data from the file
|
||||
std::string file_path;
|
||||
if (is_sync_mode) {
|
||||
file_path = specific_dump_dir + "/" + file_name;
|
||||
} else {
|
||||
file_path = out_dir + "/" + file_name;
|
||||
}
|
||||
|
||||
std::ifstream infile;
|
||||
infile.open(file_path.c_str(), std::ios::binary | std::ios::ate);
|
||||
if (!infile.is_open()) {
|
||||
MS_LOG(ERROR) << "Failed to open bin file " << file_name;
|
||||
break;
|
||||
}
|
||||
uint64_t file_size = infile.tellg();
|
||||
infile.seekg(0, std::ios::beg);
|
||||
buffer = new std::vector<char>(file_size);
|
||||
if (!infile.read(buffer->data(), file_size)) {
|
||||
MS_LOG(ERROR) << "Failed to read in bin file " << file_name;
|
||||
break;
|
||||
}
|
||||
data_size = file_size;
|
||||
infile.close();
|
||||
}
|
||||
}
|
||||
} else {
|
||||
MS_LOG(INFO) << "directory does not exist!";
|
||||
}
|
||||
closedir(d);
|
||||
|
||||
// call LoadNewTensor to store tensor in internal cache
|
||||
auto tensor_data = std::make_shared<TensorData>();
|
||||
tensor_data->SetName(backend_name[i]);
|
||||
tensor_data->SetExecutionOrder(0);
|
||||
tensor_data->SetSlot(slot[i]);
|
||||
tensor_data->SetIteration(iteration[i]);
|
||||
tensor_data->SetDeviceId(device_id[i]);
|
||||
tensor_data->SetRootGraphId(root_graph_id[i]);
|
||||
if (data_size) {
|
||||
tensor_data->SetDataPtr(buffer->data());
|
||||
} else {
|
||||
tensor_data->SetDataPtr(NULL);
|
||||
}
|
||||
tensor_data->SetByteSize(data_size);
|
||||
tensor_data->SetType(type_name);
|
||||
tensor_data->SetShape(shape);
|
||||
if (data_size) {
|
||||
tensor_loader_->LoadNewTensor(tensor_data, false);
|
||||
}
|
||||
|
||||
// add to result_list
|
||||
result_list->push_back(tensor_data);
|
||||
}
|
||||
}
|
||||
|
||||
void ReplaceSrcFileName(const bool is_sync_mode, std::string *dump_style_name) {
|
||||
const std::string strsrc = "/";
|
||||
std::string strdst;
|
||||
if (is_sync_mode) {
|
||||
strdst = "--";
|
||||
} else {
|
||||
strdst = "_";
|
||||
}
|
||||
std::string::size_type pos = 0;
|
||||
std::string::size_type srclen = strsrc.size();
|
||||
std::string::size_type dstlen = strdst.size();
|
||||
|
||||
while ((pos = dump_style_name->find(strsrc, pos)) != std::string::npos) {
|
||||
dump_style_name->replace(pos, srclen, strdst);
|
||||
pos += dstlen;
|
||||
}
|
||||
}
|
||||
|
||||
std::vector<std::shared_ptr<TensorData>> DebugServices::ReadNeededDumpedTensors(unsigned int iteration) {
|
||||
// get a list of nodes and the devices they are on to monitor
|
||||
std::vector<std::shared_ptr<TensorData>> tensor_list;
|
||||
std::map<std::tuple<uint32_t, uint32_t>, std::unordered_set<std::string>> device_and_graph_to_nodes;
|
||||
for (auto w_table_item : watchpoint_table) {
|
||||
auto wp = std::get<1>(w_table_item);
|
||||
for (auto check_node : wp.check_node_list) {
|
||||
unsigned int index = 0;
|
||||
std::string w_name = std::get<0>(check_node);
|
||||
bool w_is_param = std::get<1>(check_node);
|
||||
|
||||
std::string node_name = w_name;
|
||||
if (w_is_param) {
|
||||
std::size_t found = node_name.find_last_of("/");
|
||||
node_name = node_name.substr(found + 1);
|
||||
}
|
||||
|
||||
std::vector<uint32_t> devices = std::get<1>(wp.check_node_device_list[index]);
|
||||
std::vector<uint32_t> graphs = std::get<1>(wp.check_node_graph_list[index]);
|
||||
for (auto device : devices) {
|
||||
for (auto graph : graphs) {
|
||||
std::tuple<uint32_t, uint32_t> key(device, graph);
|
||||
device_and_graph_to_nodes[key].insert(node_name);
|
||||
}
|
||||
}
|
||||
|
||||
index++;
|
||||
}
|
||||
}
|
||||
|
||||
// scan each device/iteration dir for the watched nodes for each device, and add to tensor_list
|
||||
// as they are found
|
||||
for (auto const &device_and_graph_item : device_and_graph_to_nodes) {
|
||||
std::tuple<uint32_t, uint32_t> device_and_graph = device_and_graph_item.first;
|
||||
uint32_t device_id = std::get<0>(device_and_graph);
|
||||
uint32_t root_graph_id = std::get<1>(device_and_graph);
|
||||
std::unordered_set<std::string> wp_nodes = device_and_graph_item.second;
|
||||
std::vector<std::tuple<std::string, std::string>> proto_to_dump;
|
||||
|
||||
std::string specific_dump_dir;
|
||||
if (is_sync_mode) {
|
||||
specific_dump_dir = dump_dir + "/device_" + std::to_string(device_id) + "/iteration_" + std::to_string(iteration);
|
||||
} else {
|
||||
specific_dump_dir = dump_dir + "/device_" + std::to_string(device_id) + "/" + net_name + "_graph_" +
|
||||
std::to_string(root_graph_id) + "/" + std::to_string(root_graph_id) + "/" +
|
||||
std::to_string(iteration);
|
||||
}
|
||||
|
||||
// convert node names to dump style
|
||||
for (auto node : wp_nodes) {
|
||||
std::string orig_name = node;
|
||||
std::string dump_style_name = node;
|
||||
ReplaceSrcFileName(is_sync_mode, &dump_style_name);
|
||||
|
||||
if (is_sync_mode) {
|
||||
dump_style_name.append("_output_");
|
||||
}
|
||||
|
||||
proto_to_dump.push_back(std::tuple<std::string, std::string>(orig_name, dump_style_name));
|
||||
}
|
||||
|
||||
// search files in dir for the one that meets the filename prefix and read the file into memory
|
||||
DIR *d;
|
||||
d = opendir(specific_dump_dir.c_str());
|
||||
if (d != nullptr) {
|
||||
struct dirent *dir = nullptr;
|
||||
while ((dir = readdir(d)) != NULL) {
|
||||
if (dir->d_type == DT_REG) {
|
||||
std::string file_name = dir->d_name;
|
||||
for (auto &node : proto_to_dump) {
|
||||
std::string dump_name = std::get<1>(node);
|
||||
std::size_t found = 0;
|
||||
|
||||
if (is_sync_mode) {
|
||||
found = file_name.rfind(dump_name, 0);
|
||||
} else {
|
||||
std::string file_name_w_o_prefix = file_name.substr(file_name.find('.') + 1);
|
||||
found = file_name_w_o_prefix.rfind(dump_name, 0);
|
||||
}
|
||||
|
||||
if (found == 0) {
|
||||
std::vector<size_t> slot_list;
|
||||
GetSlotInfo(file_name, dump_name, specific_dump_dir, &slot_list);
|
||||
for (auto slot : slot_list) {
|
||||
// add a TensorData entry (data will be read when needed)
|
||||
std::vector<int64_t> shape;
|
||||
std::string orig_name = std::get<0>(node);
|
||||
auto tensor_data = std::make_shared<TensorData>();
|
||||
tensor_data->SetName(orig_name);
|
||||
tensor_data->SetExecutionOrder(0);
|
||||
tensor_data->SetSlot(slot);
|
||||
tensor_data->SetIteration(iteration);
|
||||
tensor_data->SetDeviceId(device_id);
|
||||
tensor_data->SetRootGraphId(root_graph_id);
|
||||
tensor_data->SetDataPtr(NULL);
|
||||
tensor_data->SetByteSize(0);
|
||||
tensor_data->SetType("");
|
||||
tensor_data->SetShape(shape);
|
||||
|
||||
tensor_list.push_back(tensor_data);
|
||||
}
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return tensor_list;
|
||||
}
|
||||
#endif
|
||||
|
||||
void DebugServices::ReadNodesTensors(std::vector<std::string> name, std::vector<std::string> *ret_name,
|
||||
std::vector<char *> *data_ptr, std::vector<ssize_t> *data_size,
|
||||
std::vector<TypePtr> *dtype, std::vector<std::vector<int64_t>> *shape) {
|
||||
std::vector<unsigned int> *dtype, std::vector<std::vector<int64_t>> *shape) {
|
||||
std::vector<std::tuple<std::string, std::shared_ptr<TensorData>>> result_list;
|
||||
tensor_loader_->SearchTensors(name, &result_list);
|
||||
|
||||
|
@ -217,13 +691,14 @@ void DebugServices::ReadNodesTensors(std::vector<std::string> name, std::vector<
|
|||
continue;
|
||||
}
|
||||
ret_name->push_back(std::get<0>(result));
|
||||
data_ptr->push_back(reinterpret_cast<char *>(std::get<1>(result)->GetTensor()->data_c()));
|
||||
data_size->push_back(std::get<1>(result)->GetTensor()->data().nbytes());
|
||||
dtype->push_back(std::get<1>(result)->GetTensor()->Dtype());
|
||||
shape->push_back(std::get<1>(result)->GetTensor()->shape());
|
||||
data_ptr->push_back(reinterpret_cast<char *>(std::get<1>(result)->GetDataPtr()));
|
||||
data_size->push_back(std::get<1>(result)->GetByteSize());
|
||||
dtype->push_back(std::get<1>(result)->GetType());
|
||||
shape->push_back(std::get<1>(result)->GetShape());
|
||||
}
|
||||
}
|
||||
|
||||
#ifdef ONLINE_DBG_MODE
|
||||
bool DebugServices::IsWatchPoint(const std::string &kernel_name, const CNodePtr &kernel) const {
|
||||
bool ret = false;
|
||||
for (auto w_table_item : watchpoint_table) {
|
||||
|
@ -256,6 +731,7 @@ bool DebugServices::IsWatchPointNodeInput(const std::string &w_name, const CNode
|
|||
return false;
|
||||
}
|
||||
}
|
||||
#endif
|
||||
|
||||
void DebugServices::EmptyTensor() { tensor_loader_->EmptyTensor(); }
|
||||
|
||||
|
@ -273,6 +749,7 @@ void DebugServices::EmptyPrevTensor() { tensor_loader_->EmptyPrevTensor(); }
|
|||
|
||||
void DebugServices::EmptyCurrentTensor() { tensor_loader_->EmptyCurrentTensor(); }
|
||||
|
||||
#ifdef ONLINE_DBG_MODE
|
||||
bool DebugServices::DumpTensorToFile(const std::string &tensor_name, bool trans_flag, const std::string &filepath,
|
||||
const std::string &host_fmt, const std::vector<int64_t> &host_shape,
|
||||
TypeId host_type, TypeId addr_type_id, const std::string &addr_format,
|
||||
|
@ -280,6 +757,7 @@ bool DebugServices::DumpTensorToFile(const std::string &tensor_name, bool trans_
|
|||
return tensor_loader_->DumpTensorToFile(tensor_name, trans_flag, filepath, host_fmt, host_shape, host_type,
|
||||
addr_type_id, addr_format, slot);
|
||||
}
|
||||
#endif
|
||||
|
||||
bool DebugServices::LoadNewTensor(const std::shared_ptr<TensorData> &tensor, bool keep_prev) {
|
||||
return tensor_loader_->LoadNewTensor(tensor, keep_prev);
|
||||
|
@ -298,6 +776,7 @@ void DebugServices::ResetLoadedTensors() {
|
|||
tensor_loader_->SwapCurrentPrev();
|
||||
}
|
||||
|
||||
#ifdef ONLINE_DBG_MODE
|
||||
std::vector<std::shared_ptr<TensorData>> DebugServices::GetNodeTensor(const CNodePtr &kernel) {
|
||||
MS_EXCEPTION_IF_NULL(kernel);
|
||||
std::vector<std::shared_ptr<TensorData>> result;
|
||||
|
@ -310,6 +789,8 @@ std::vector<std::shared_ptr<TensorData>> DebugServices::GetNodeTensor(const CNod
|
|||
}
|
||||
return result;
|
||||
}
|
||||
#endif
|
||||
|
||||
bool DebugServices::TensorExistsInCurrent(std::string tensor_name) {
|
||||
return tensor_loader_->TensorExistsInCurrent(tensor_name);
|
||||
}
|
||||
|
@ -317,4 +798,18 @@ void DebugServices::MoveTensorCurrentToPrev(std::string tensor_name) {
|
|||
tensor_loader_->MoveTensorCurrentToPrev(tensor_name);
|
||||
}
|
||||
|
||||
void DebugServices::SetNetName(std::string net_name) { this->net_name = net_name; }
|
||||
|
||||
std::string DebugServices::GetNetName() { return net_name; }
|
||||
|
||||
void DebugServices::SetDumpDir(std::string dump_dir) { this->dump_dir = dump_dir; }
|
||||
|
||||
std::string DebugServices::GetDumpDir() { return dump_dir; }
|
||||
|
||||
void DebugServices::SetSyncMode(bool is_sync_mode) { this->is_sync_mode = is_sync_mode; }
|
||||
|
||||
bool DebugServices::GetSyncMode() { return is_sync_mode; }
|
||||
|
||||
#ifdef ONLINE_DBG_MODE
|
||||
} // namespace mindspore
|
||||
#endif
|
||||
|
|
|
@ -16,6 +16,17 @@
|
|||
#ifndef MINDSPORE_CCSRC_DEBUG_DEBUG_SERVICES_H_
|
||||
#define MINDSPORE_CCSRC_DEBUG_DEBUG_SERVICES_H_
|
||||
|
||||
#ifndef OFFLINE_DBG_MODE
|
||||
#define ONLINE_DBG_MODE
|
||||
#endif
|
||||
|
||||
#ifdef OFFLINE_DBG_MODE
|
||||
#include "Eigen/Core"
|
||||
#include "Eigen/src/Core/arch/CUDA/Half.h"
|
||||
using float16 = Eigen::half;
|
||||
#include "debugger/offline_debug/offline_logger.h"
|
||||
#endif
|
||||
|
||||
#include <math.h>
|
||||
#include <vector>
|
||||
#include <string>
|
||||
|
@ -26,11 +37,13 @@
|
|||
#include <mutex>
|
||||
#include <map>
|
||||
#include <limits>
|
||||
#include <sstream>
|
||||
#include "debug/tensor_load.h"
|
||||
#include "debug/tensor_data.h"
|
||||
#include "ir/dtype.h"
|
||||
|
||||
#ifdef ONLINE_DBG_MODE
|
||||
namespace mindspore {
|
||||
#endif
|
||||
class DebugServices {
|
||||
public:
|
||||
DebugServices();
|
||||
|
@ -103,6 +116,8 @@ class DebugServices {
|
|||
unsigned int id;
|
||||
condition_t condition;
|
||||
std::vector<std::tuple<std::string, bool>> check_node_list;
|
||||
std::vector<std::tuple<std::string, std::vector<uint32_t>>> check_node_device_list;
|
||||
std::vector<std::tuple<std::string, std::vector<uint32_t>>> check_node_graph_list;
|
||||
std::vector<parameter_t> parameter_list;
|
||||
size_t location = 0;
|
||||
|
||||
|
@ -167,30 +182,55 @@ class DebugServices {
|
|||
}
|
||||
} watchpoint_t;
|
||||
|
||||
void AddWatchpoint(unsigned int id, unsigned int watch_condition, float parameter,
|
||||
const std::vector<std::tuple<std::string, bool>> &check_node_list,
|
||||
const std::vector<parameter_t> ¶meter_list);
|
||||
void AddWatchpoint(
|
||||
unsigned int id, unsigned int watch_condition, float parameter,
|
||||
const std::vector<std::tuple<std::string, bool>> &check_node_list, const std::vector<parameter_t> ¶meter_list,
|
||||
const std::vector<std::tuple<std::string, std::vector<uint32_t>>> *check_node_device_list = nullptr,
|
||||
const std::vector<std::tuple<std::string, std::vector<uint32_t>>> *check_node_graph_list = nullptr);
|
||||
|
||||
void RemoveWatchpoint(unsigned int id);
|
||||
|
||||
void CheckWatchpoints(std::vector<std::string> *name, std::vector<std::string> *slot, std::vector<int> *condition,
|
||||
std::vector<unsigned int> *watchpoint_id, std::vector<std::vector<parameter_t>> *parameters,
|
||||
std::vector<int32_t> *error_code, const std::vector<std::string> &op_overflows,
|
||||
const std::vector<std::shared_ptr<TensorData>> &tensor_list, bool init_dbg_suspend,
|
||||
const bool step_end, const bool recheck);
|
||||
std::vector<std::shared_ptr<TensorData>> *tensor_list, bool init_dbg_suspend,
|
||||
const bool step_end, const bool recheck, std::vector<unsigned int> *device_id = nullptr,
|
||||
std::vector<unsigned int> *root_graph_id = nullptr);
|
||||
|
||||
void AddWatchPointsToCheck(bool init_dbg_suspend, bool step_end, bool recheck, const std::string &tensor_name,
|
||||
const std::string &tensor_name_no_slot, bool *previous_iter_tensor_needed,
|
||||
std::string *qualified_tensor_name, std::vector<watchpoint_t> *watchpoints_to_check);
|
||||
|
||||
#ifdef OFFLINE_DBG_MODE
|
||||
void GetSlotInfo(const std::string &file_name, const std::string &dump_name, const std::string &specific_dump_dir,
|
||||
std::vector<size_t> *slot_list);
|
||||
|
||||
std::size_t GetShapeTypeInfo(const std::string &specific_dump_dir, std::size_t slot,
|
||||
const std::string &prefix_dump_file_name, std::string *file_name, std::string *type_name,
|
||||
std::string *out_dir, std::vector<int64_t> *shape);
|
||||
|
||||
void ReadDumpedTensor(std::vector<std::string> backend_name, std::vector<size_t> slot,
|
||||
std::vector<unsigned int> device_id, std::vector<unsigned int> iteration,
|
||||
std::vector<unsigned int> root_graph_id, std::vector<std::shared_ptr<TensorData>> *result_list);
|
||||
|
||||
std::vector<std::shared_ptr<TensorData>> ReadNeededDumpedTensors(unsigned int iteration);
|
||||
|
||||
void *GetPrevTensor(const std::shared_ptr<TensorData> &tensor, bool previous_iter_tensor_needed);
|
||||
#endif
|
||||
void ReadNodesTensors(std::vector<std::string> name, std::vector<std::string> *ret_name,
|
||||
std::vector<char *> *data_ptr, std::vector<ssize_t> *data_size, std::vector<TypePtr> *dtype,
|
||||
std::vector<std::vector<int64_t>> *shape);
|
||||
|
||||
std::vector<char *> *data_ptr, std::vector<ssize_t> *data_size,
|
||||
std::vector<unsigned int> *dtype, std::vector<std::vector<int64_t>> *shape);
|
||||
#ifdef ONLINE_DBG_MODE
|
||||
bool IsWatchPoint(const std::string &kernel_name, const CNodePtr &kernel = nullptr) const;
|
||||
|
||||
bool IsWatchPointNodeInput(const std::string &w_name, const CNodePtr &kernel) const;
|
||||
|
||||
#endif
|
||||
void EmptyTensor();
|
||||
|
||||
std::vector<std::shared_ptr<TensorData>> GetTensor() const;
|
||||
|
||||
void AddAnalyzedTensorToCache(const bool recheck, const unsigned int id, const std::string &tensor_name);
|
||||
|
||||
std::vector<std::shared_ptr<TensorData>> GetNodeTensorMap(const std::string &node_name) const;
|
||||
|
||||
uint32_t GetTensorLoaderIterNum() const;
|
||||
|
@ -201,31 +241,51 @@ class DebugServices {
|
|||
|
||||
void EmptyCurrentTensor();
|
||||
|
||||
#ifdef ONLINE_DBG_MODE
|
||||
bool DumpTensorToFile(const std::string &tensor_name, bool trans_flag, const std::string &filepath,
|
||||
const std::string &host_fmt, const std::vector<int64_t> &host_shape, TypeId host_type,
|
||||
TypeId addr_type_id, const std::string &addr_format, size_t slot) const;
|
||||
#endif
|
||||
|
||||
bool LoadNewTensor(const std::shared_ptr<TensorData> &tensor, bool keep_prev);
|
||||
|
||||
std::unordered_map<unsigned int, watchpoint_t> GetWatchpointTable();
|
||||
|
||||
void ResetLoadedTensors();
|
||||
|
||||
#ifdef ONLINE_DBG_MODE
|
||||
std::vector<std::shared_ptr<TensorData>> GetNodeTensor(const CNodePtr &kernel);
|
||||
#endif
|
||||
|
||||
bool TensorExistsInCurrent(std::string tensor_name);
|
||||
|
||||
void MoveTensorCurrentToPrev(std::string tensor_name);
|
||||
|
||||
void SetNetName(std::string net_name);
|
||||
|
||||
std::string GetNetName();
|
||||
|
||||
void SetDumpDir(std::string dump_dir);
|
||||
|
||||
std::string GetDumpDir();
|
||||
|
||||
void SetSyncMode(bool is_sync_mode);
|
||||
|
||||
bool GetSyncMode();
|
||||
|
||||
private:
|
||||
std::mutex lock_;
|
||||
|
||||
// to keep track of watchpoints that have been checked already for a tensor in current step
|
||||
std::unordered_map<std::string, std::set<int32_t>> wp_id_cache;
|
||||
std::unordered_map<unsigned int, watchpoint_t> watchpoint_table;
|
||||
std::string net_name;
|
||||
std::string dump_dir;
|
||||
bool is_sync_mode;
|
||||
|
||||
TensorLoader *tensor_loader_;
|
||||
};
|
||||
#ifdef ONLINE_DBG_MODE
|
||||
} // namespace mindspore
|
||||
#endif
|
||||
|
||||
#endif // MINDSPORE_CCSRC_DEBUG_DEBUG_SERVICES_H_
|
||||
|
|
|
@ -755,7 +755,7 @@ std::list<TensorProto> Debugger::LoadTensors(const ProtoVector<TensorProto> &ten
|
|||
std::vector<std::string> ret_name;
|
||||
std::vector<char *> data_ptr;
|
||||
std::vector<ssize_t> data_size;
|
||||
std::vector<TypePtr> dtype;
|
||||
std::vector<unsigned int> dtype;
|
||||
std::vector<std::vector<int64_t>> shape;
|
||||
|
||||
std::transform(tensors.begin(), tensors.end(), std::back_inserter(name), GetTensorFullName);
|
||||
|
@ -789,7 +789,7 @@ std::list<TensorProto> Debugger::LoadTensors(const ProtoVector<TensorProto> &ten
|
|||
|
||||
tensor_item.set_tensor_content(data_ptr[result_index] + size_iter, chunk_size);
|
||||
|
||||
tensor_item.set_data_type(GetDebuggerNumberDataType(dtype[result_index]));
|
||||
tensor_item.set_data_type((debugger::DataType)dtype[result_index]);
|
||||
for (auto &elem : shape[result_index]) {
|
||||
tensor_item.add_dims(elem);
|
||||
}
|
||||
|
@ -827,7 +827,7 @@ std::list<WatchpointHit> Debugger::CheckWatchpoints(const std::string &watchnode
|
|||
tensor_list = debug_services_->GetNodeTensor(kernel);
|
||||
}
|
||||
debug_services_->CheckWatchpoints(&name, &slot, &condition, &watchpoint_id, ¶meters, &error_codes, overflow_ops,
|
||||
tensor_list, initial_suspend_, watchnode.empty(), recheck);
|
||||
&tensor_list, initial_suspend_, watchnode.empty(), recheck);
|
||||
std::list<WatchpointHit> hits;
|
||||
for (unsigned int i = 0; i < name.size(); i++) {
|
||||
WatchpointHit hit;
|
||||
|
|
|
@ -0,0 +1,28 @@
|
|||
-----------------------------------------------------------
|
||||
tensor_info_1 attributes:
|
||||
node name = Default/network-TrainOneStepCell/network-WithLossCell/_backbone-AlexNet/conv3-Conv2d/Conv2D-op169
|
||||
slot = 0
|
||||
iteration = 2
|
||||
device_id = None
|
||||
root_graph_id = 1
|
||||
is_parameter = False
|
||||
|
||||
tensor_data_1 attributes:
|
||||
data (printed in uint8) = [149 167 124 ... 158 212 164]
|
||||
size in bytes = 2076672
|
||||
debugger dtype = 10
|
||||
shape = [32, 192, 13, 13]
|
||||
-----------------------------------------------------------
|
||||
tensor_info_2 attributes:
|
||||
node name = Default/network-TrainOneStepCell/network-WithLossCell/_backbone-AlexNet/ReLUV2-op348
|
||||
slot = 1
|
||||
iteration = 2
|
||||
device_id = None
|
||||
root_graph_id = 1
|
||||
is_parameter = False
|
||||
|
||||
tensor_data_2 attributes:
|
||||
data (printed in uint8) = [ 20 21 18 ... 126 98 25]
|
||||
size in bytes = 129792
|
||||
debugger dtype = 6
|
||||
shape = [32, 12, 13, 13, 2]
|
|
@ -0,0 +1,72 @@
|
|||
# Copyright 2021 Huawei Technologies Co., Ltd
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
# ==============================================================================
|
||||
"""
|
||||
Read tensor test script for offline debugger APIs.
|
||||
"""
|
||||
|
||||
import mindspore.offline_debug.dbg_services as d
|
||||
import numpy as np
|
||||
|
||||
|
||||
def main():
|
||||
|
||||
debugger_backend = d.DbgServices(
|
||||
dump_file_path="/opt/nvme2n1/j00455527/dumps/async_sink_true/032421")
|
||||
|
||||
_ = debugger_backend.initialize(net_name="alexnet", is_sync_mode=False)
|
||||
|
||||
# output tensor with zero slot
|
||||
info1 = d.TensorInfo(node_name="Default/network-TrainOneStepCell/network-WithLossCell/_backbone-AlexNet/"
|
||||
"conv3-Conv2d/Conv2D-op169",
|
||||
slot=0, iteration=2, device_id=0, root_graph_id=1, is_parameter=False)
|
||||
# output tensor with non-zero slot
|
||||
info2 = d.TensorInfo(node_name="Default/network-TrainOneStepCell/network-WithLossCell/_backbone-AlexNet/"
|
||||
"ReLUV2-op348",
|
||||
slot=1, iteration=2, device_id=0, root_graph_id=1, is_parameter=False)
|
||||
|
||||
tensor_info = [info1, info2]
|
||||
|
||||
tensor_data = debugger_backend.read_tensors(tensor_info)
|
||||
|
||||
print_read_tensors(tensor_info, tensor_data)
|
||||
|
||||
|
||||
def print_read_tensors(tensor_info, tensor_data):
|
||||
"""Print read tensors."""
|
||||
for x, _ in enumerate(tensor_info):
|
||||
print("-----------------------------------------------------------")
|
||||
print("tensor_info_" + str(x+1) + " attributes:")
|
||||
print("node name = ", tensor_info[x].node_name)
|
||||
print("slot = ", tensor_info[x].slot)
|
||||
print("iteration = ", tensor_info[x].iteration)
|
||||
print("device_id = ", tensor_info[x].device_id)
|
||||
print("root_graph_id = ", tensor_info[x].root_graph_id)
|
||||
print("is_parameter = ", tensor_info[x].is_parameter)
|
||||
print()
|
||||
print("tensor_data_" + str(x+1) + " attributes:")
|
||||
print("data (printed in uint8) = ", np.frombuffer(
|
||||
tensor_data[x].data_ptr, np.uint8, tensor_data[x].data_size))
|
||||
py_byte_size = len(tensor_data[x].data_ptr)
|
||||
c_byte_size = tensor_data[x].data_size
|
||||
if c_byte_size != py_byte_size:
|
||||
print("The python byte size of ", py_byte_size,
|
||||
" does not match the C++ byte size of ", c_byte_size)
|
||||
print("size in bytes = ", tensor_data[x].data_size)
|
||||
print("debugger dtype = ", tensor_data[x].dtype)
|
||||
print("shape = ", tensor_data[x].shape)
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
|
@ -0,0 +1,14 @@
|
|||
-----------------------------------------------------------
|
||||
watchpoint_hit for test_1 attributes:
|
||||
name = Default/network-TrainOneStepCell/network-WithLossCell/_backbone-AlexNet/conv3-Conv2d/Conv2D-op169
|
||||
slot = 0
|
||||
condition = 6
|
||||
watchpoint_id = 1
|
||||
parameter 0 name = param
|
||||
parameter 0 disabled = False
|
||||
parameter 0 value = 0.0
|
||||
parameter 0 hit = True
|
||||
parameter 0 actual_value = -0.1417236328125
|
||||
error code = 0
|
||||
device_id = 0
|
||||
root_graph_id = 1
|
|
@ -0,0 +1,92 @@
|
|||
# Copyright 2021 Huawei Technologies Co., Ltd
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
# ==============================================================================
|
||||
"""
|
||||
Watchpoints test script for offline debugger APIs.
|
||||
"""
|
||||
|
||||
import mindspore.offline_debug.dbg_services as d
|
||||
|
||||
|
||||
def main():
|
||||
|
||||
debugger_backend = d.DbgServices(
|
||||
dump_file_path="/opt/nvme2n1/j00455527/dumps/async_sink_true/032421")
|
||||
|
||||
_ = debugger_backend.initialize(net_name="alexnet", is_sync_mode=False)
|
||||
|
||||
# NOTES:
|
||||
# -> watch_condition=6 is MIN_LT
|
||||
# -> watch_condition=18 is CHANGE_TOO_LARGE
|
||||
|
||||
# test 1: watchpoint set and hit (watch_condition=6)
|
||||
param1 = d.Parameter(name="param", disabled=False, value=0.0)
|
||||
_ = debugger_backend.add_watchpoint(watchpoint_id=1, watch_condition=6,
|
||||
check_node_list={"Default/network-TrainOneStepCell/network-WithLossCell/"
|
||||
"_backbone-AlexNet/conv3-Conv2d/Conv2D-op169":
|
||||
{"device_id": [0], "root_graph_id": [1], "is_parameter": False
|
||||
}}, parameter_list=[param1])
|
||||
|
||||
watchpoint_hits_test_1 = debugger_backend.check_watchpoints(iteration=2)
|
||||
if len(watchpoint_hits_test_1) != 1:
|
||||
print("ERROR -> test 1: watchpoint set but not hit just once")
|
||||
print_watchpoint_hits(watchpoint_hits_test_1, 1)
|
||||
|
||||
# test 2: watchpoint remove and ensure it's not hit
|
||||
_ = debugger_backend.remove_watchpoint(watchpoint_id=1)
|
||||
watchpoint_hits_test_2 = debugger_backend.check_watchpoints(iteration=2)
|
||||
if watchpoint_hits_test_2:
|
||||
print("ERROR -> test 2: watchpoint removed but hit")
|
||||
|
||||
# test 3: watchpoint set and not hit, then remove
|
||||
param2 = d.Parameter(name="param", disabled=False, value=-1000.0)
|
||||
_ = debugger_backend.add_watchpoint(watchpoint_id=2, watch_condition=6,
|
||||
check_node_list={"Default/network-TrainOneStepCell/network-WithLossCell/"
|
||||
"_backbone-AlexNet/conv3-Conv2d/Conv2D-op169":
|
||||
{"device_id": [0], "root_graph_id": [1], "is_parameter": False
|
||||
}}, parameter_list=[param2])
|
||||
|
||||
watchpoint_hits_test_3 = debugger_backend.check_watchpoints(iteration=2)
|
||||
if watchpoint_hits_test_3:
|
||||
print("ERROR -> test 3: watchpoint set but not supposed to be hit")
|
||||
_ = debugger_backend.remove_watchpoint(watchpoint_id=2)
|
||||
|
||||
|
||||
def print_watchpoint_hits(watchpoint_hits, test_id):
|
||||
"""Print watchpoint hits."""
|
||||
for x, _ in enumerate(watchpoint_hits):
|
||||
print("-----------------------------------------------------------")
|
||||
print("watchpoint_hit for test_%u attributes:" % test_id)
|
||||
print("name = ", watchpoint_hits[x].name)
|
||||
print("slot = ", watchpoint_hits[x].slot)
|
||||
print("condition = ", watchpoint_hits[x].condition)
|
||||
print("watchpoint_id = ", watchpoint_hits[x].watchpoint_id)
|
||||
for p, _ in enumerate(watchpoint_hits[x].parameters):
|
||||
print("parameter ", p, " name = ",
|
||||
watchpoint_hits[x].parameters[p].name)
|
||||
print("parameter ", p, " disabled = ",
|
||||
watchpoint_hits[x].parameters[p].disabled)
|
||||
print("parameter ", p, " value = ",
|
||||
watchpoint_hits[x].parameters[p].value)
|
||||
print("parameter ", p, " hit = ",
|
||||
watchpoint_hits[x].parameters[p].hit)
|
||||
print("parameter ", p, " actual_value = ",
|
||||
watchpoint_hits[x].parameters[p].actual_value)
|
||||
print("error code = ", watchpoint_hits[x].error_code)
|
||||
print("device_id = ", watchpoint_hits[x].device_id)
|
||||
print("root_graph_id = ", watchpoint_hits[x].root_graph_id)
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
|
@ -0,0 +1,49 @@
|
|||
python sync_trans_false_read_tensors.py > sync_trans_false_read_tensors.actual
|
||||
sed -i '/\[WARNING\]/d' sync_trans_false_read_tensors.actual
|
||||
sed -i '/Deprecated/d' sync_trans_false_read_tensors.actual
|
||||
diff sync_trans_false_read_tensors.actual sync_trans_false_read_tensors.expected
|
||||
if [ $? -eq 0 ]; then
|
||||
echo sync_trans_false_read_tensors PASSED
|
||||
else
|
||||
echo sync_trans_false_read_tensors FAILED
|
||||
fi
|
||||
|
||||
python sync_trans_true_read_tensors.py > sync_trans_true_read_tensors.actual
|
||||
sed -i '/\[WARNING\]/d' sync_trans_true_read_tensors.actual
|
||||
sed -i '/Deprecated/d' sync_trans_true_read_tensors.actual
|
||||
diff sync_trans_true_read_tensors.actual sync_trans_true_read_tensors.expected
|
||||
if [ $? -eq 0 ]; then
|
||||
echo sync_trans_true_read_tensors PASSED
|
||||
else
|
||||
echo sync_trans_true_read_tensors FAILED
|
||||
fi
|
||||
|
||||
python sync_trans_false_watchpoints.py > sync_trans_false_watchpoints.actual
|
||||
sed -i '/\[WARNING\]/d' sync_trans_false_watchpoints.actual
|
||||
sed -i '/Deprecated/d' sync_trans_false_watchpoints.actual
|
||||
diff sync_trans_false_watchpoints.actual sync_trans_false_watchpoints.expected
|
||||
if [ $? -eq 0 ]; then
|
||||
echo sync_trans_false_watchpoints PASSED
|
||||
else
|
||||
echo sync_trans_false_watchpoints FAILED
|
||||
fi
|
||||
|
||||
python async_sink_mode_true_read_tensors.py > async_sink_mode_true_read_tensors.actual
|
||||
sed -i '/\[WARNING\]/d' async_sink_mode_true_read_tensors.actual
|
||||
sed -i '/Deprecated/d' async_sink_mode_true_read_tensors.actual
|
||||
diff async_sink_mode_true_read_tensors.actual async_sink_mode_true_read_tensors.expected
|
||||
if [ $? -eq 0 ]; then
|
||||
echo async_sink_mode_true_read_tensors PASSED
|
||||
else
|
||||
echo async_sink_mode_true_read_tensors FAILED
|
||||
fi
|
||||
|
||||
python async_sink_mode_true_watchpoints.py > async_sink_mode_true_watchpoints.actual
|
||||
sed -i '/\[WARNING\]/d' async_sink_mode_true_watchpoints.actual
|
||||
sed -i '/Deprecated/d' async_sink_mode_true_watchpoints.actual
|
||||
diff async_sink_mode_true_watchpoints.actual async_sink_mode_true_watchpoints.expected
|
||||
if [ $? -eq 0 ]; then
|
||||
echo async_sink_mode_true_watchpoints PASSED
|
||||
else
|
||||
echo async_sink_mode_true_watchpoints FAILED
|
||||
fi
|
|
@ -0,0 +1,70 @@
|
|||
-----------------------------------------------------------
|
||||
tensor_info_1 attributes:
|
||||
node name = Default/network-WithLossCell/_backbone-AlexNet/conv2-Conv2d/conv2.bias
|
||||
slot = 0
|
||||
iteration = 2
|
||||
device_id = None
|
||||
root_graph_id = 0
|
||||
is_parameter = True
|
||||
|
||||
tensor_data_1 attributes:
|
||||
data (printed in uint8) = [170 19 44 181 254 212 16 52 52 162 148 180 130 115 226 180 183 243
|
||||
101 52 224 79 189 51 10 70 69 51 199 75 159 52 79 98 104 52
|
||||
106 77 19 52 129 183 8 180 252 58 48 180 35 219 9 52 240 201
|
||||
179 51 142 151 158 51 210 145 182 53 140 219 0 53 140 219 22 181
|
||||
46 33 87 180 238 90 122 180 166 10 38 179 202 195 4 53 166 10
|
||||
150 51 214 120 209 52 235 115 37 180 92 177 215 180 0 136 84 51
|
||||
72 114 145 180 43 169 255 180 114 27 61 52 76 225 122 50 126 72
|
||||
159 51 58 35 202 51 114 61 106 51 60 223 63 52 209 179 1 52
|
||||
232 217 44 178 130 158 109 179 213 231 10 179 37 40 94 179 208 68
|
||||
64 53 6 52 249 52 162 35 1 181 231 29 155 52 30 201 69 180
|
||||
229 131 126 51 18 165 109 180 164 112 163 181 116 172 11 178 6 129
|
||||
37 52 54 205 203 180 115 104 145 52 232 106 219 179 36 40 214 52
|
||||
202 50 204 52 76 89 38 179 230 140 232 178 168 53 77 52 180 191
|
||||
108 51 128 183 64 51 56 137 161 180 247 6 143 180 126 63 197 180
|
||||
198 177 94 52 140 185 139 51 150 178 228 180 255 67 150 52 134 201
|
||||
164 52 107 43 14 53 174 216 63 179 40 160 41 53 120 88 72 179
|
||||
218 172 234 52 234 38 25 52 85 159 155 180 254 67 138 180 34 253
|
||||
118 180 218 61 17 52 242 133 253 52 175 37 180 52 171 62 163 52
|
||||
202 195 86 53 160 171 45 52 34 31 176 180 156 85 5 53 178 191
|
||||
68 180 42 203 140 52 248 117 72 52 248 253 212 176 195 100 202 51
|
||||
87 14 141 52 91 100 235 51 48 221 136 52 143 117 17 180 51 196
|
||||
25 52 127 29 112 180 152 144 207 178 219 104 64 52 21 174 251 52
|
||||
164 78 138 181 20 63 6 52 10 249 96 179 163 146 18 53 200 186
|
||||
236 52 2 188 85 52 124 140 121 179 246 185 22 181 246 74 249 51
|
||||
70 182 135 53 189 227 76 52 249 160 159 180 134 235 65 53 64 164
|
||||
255 51 224 156 41 53 142 117 69 181 247 151 101 53 185 175 35 52
|
||||
164 112 21 53 30 31 212 179 142 151 110 179 176 148 29 181 206 204
|
||||
88 53 116 215 214 180 172 173 216 51 106 222 153 180 200 152 19 181
|
||||
176 3 7 52 215 52 87 52]
|
||||
size in bytes = 512
|
||||
debugger dtype = 11
|
||||
shape = [128]
|
||||
-----------------------------------------------------------
|
||||
tensor_info_2 attributes:
|
||||
node name = Default/network-WithLossCell/_backbone-AlexNet/conv3-Conv2d/Conv2D-op168
|
||||
slot = 0
|
||||
iteration = 2
|
||||
device_id = None
|
||||
root_graph_id = 0
|
||||
is_parameter = False
|
||||
|
||||
tensor_data_2 attributes:
|
||||
data (printed in uint8) = [181 167 46 ... 12 204 164]
|
||||
size in bytes = 2076672
|
||||
debugger dtype = 10
|
||||
shape = [32, 12, 13, 13, 16]
|
||||
-----------------------------------------------------------
|
||||
tensor_info_3 attributes:
|
||||
node name = Default/network-WithLossCell/_backbone-AlexNet/ReLUV2-op346
|
||||
slot = 1
|
||||
iteration = 2
|
||||
device_id = None
|
||||
root_graph_id = 0
|
||||
is_parameter = False
|
||||
|
||||
tensor_data_3 attributes:
|
||||
data (printed in uint8) = [ 50 17 122 ... 94 42 90]
|
||||
size in bytes = 129792
|
||||
debugger dtype = 6
|
||||
shape = [32, 12, 13, 13, 2]
|
|
@ -0,0 +1,74 @@
|
|||
# Copyright 2021 Huawei Technologies Co., Ltd
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
# ==============================================================================
|
||||
"""
|
||||
Read tensor test script for offline debugger APIs.
|
||||
"""
|
||||
|
||||
import mindspore.offline_debug.dbg_services as d
|
||||
import numpy as np
|
||||
|
||||
|
||||
def main():
|
||||
|
||||
debugger_backend = d.DbgServices(
|
||||
dump_file_path="/opt/nvme2n1/j00455527/dumps/sync_trans_false/032421/alexnet")
|
||||
|
||||
_ = debugger_backend.initialize(
|
||||
net_name="Network Name goes here!", is_sync_mode=True)
|
||||
|
||||
# parameter
|
||||
info1 = d.TensorInfo(node_name="Default/network-WithLossCell/_backbone-AlexNet/conv2-Conv2d/conv2.bias",
|
||||
slot=0, iteration=2, device_id=0, root_graph_id=0, is_parameter=True)
|
||||
# output tensor with zero slot
|
||||
info2 = d.TensorInfo(node_name="Default/network-WithLossCell/_backbone-AlexNet/conv3-Conv2d/Conv2D-op168",
|
||||
slot=0, iteration=2, device_id=0, root_graph_id=0, is_parameter=False)
|
||||
# output tensor with non-zero slot
|
||||
info3 = d.TensorInfo(node_name="Default/network-WithLossCell/_backbone-AlexNet/ReLUV2-op346",
|
||||
slot=1, iteration=2, device_id=0, root_graph_id=0, is_parameter=False)
|
||||
|
||||
tensor_info = [info1, info2, info3]
|
||||
|
||||
tensor_data = debugger_backend.read_tensors(tensor_info)
|
||||
|
||||
print_read_tensors(tensor_info, tensor_data)
|
||||
|
||||
|
||||
def print_read_tensors(tensor_info, tensor_data):
|
||||
"""Print read tensors."""
|
||||
for x, _ in enumerate(tensor_info):
|
||||
print("-----------------------------------------------------------")
|
||||
print("tensor_info_" + str(x+1) + " attributes:")
|
||||
print("node name = ", tensor_info[x].node_name)
|
||||
print("slot = ", tensor_info[x].slot)
|
||||
print("iteration = ", tensor_info[x].iteration)
|
||||
print("device_id = ", tensor_info[x].device_id)
|
||||
print("root_graph_id = ", tensor_info[x].root_graph_id)
|
||||
print("is_parameter = ", tensor_info[x].is_parameter)
|
||||
print()
|
||||
print("tensor_data_" + str(x+1) + " attributes:")
|
||||
print("data (printed in uint8) = ", np.frombuffer(
|
||||
tensor_data[x].data_ptr, np.uint8, tensor_data[x].data_size))
|
||||
py_byte_size = len(tensor_data[x].data_ptr)
|
||||
c_byte_size = tensor_data[x].data_size
|
||||
if c_byte_size != py_byte_size:
|
||||
print("The python byte size of ", py_byte_size,
|
||||
" does not match the C++ byte size of ", c_byte_size)
|
||||
print("size in bytes = ", tensor_data[x].data_size)
|
||||
print("debugger dtype = ", tensor_data[x].dtype)
|
||||
print("shape = ", tensor_data[x].shape)
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
|
@ -0,0 +1,33 @@
|
|||
-----------------------------------------------------------
|
||||
watchpoint_hit for test_1 attributes:
|
||||
name = Default/network-WithLossCell/_backbone-AlexNet/conv3-Conv2d/Conv2D-op168
|
||||
slot = 0
|
||||
condition = 6
|
||||
watchpoint_id = 1
|
||||
parameter 0 name = param
|
||||
parameter 0 disabled = False
|
||||
parameter 0 value = 0.0
|
||||
parameter 0 hit = True
|
||||
parameter 0 actual_value = -0.14013671875
|
||||
error code = 0
|
||||
device_id = 0
|
||||
root_graph_id = 0
|
||||
-----------------------------------------------------------
|
||||
watchpoint_hit for test_4 attributes:
|
||||
name = Default/network-WithLossCell/_backbone-AlexNet/fc3-Dense/Parameter[6]_11/fc3.bias
|
||||
slot = 0
|
||||
condition = 18
|
||||
watchpoint_id = 3
|
||||
parameter 0 name = abs_mean_update_ratio_gt
|
||||
parameter 0 disabled = False
|
||||
parameter 0 value = 0.0
|
||||
parameter 0 hit = True
|
||||
parameter 0 actual_value = 0.5243796973599475
|
||||
parameter 1 name = epsilon
|
||||
parameter 1 disabled = True
|
||||
parameter 1 value = 0.0
|
||||
parameter 1 hit = False
|
||||
parameter 1 actual_value = 0.0
|
||||
error code = 0
|
||||
device_id = 0
|
||||
root_graph_id = 0
|
|
@ -0,0 +1,109 @@
|
|||
# Copyright 2021 Huawei Technologies Co., Ltd
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
# ==============================================================================
|
||||
"""
|
||||
Watchpoints test script for offline debugger APIs.
|
||||
"""
|
||||
|
||||
import mindspore.offline_debug.dbg_services as d
|
||||
|
||||
|
||||
def main():
|
||||
|
||||
debugger_backend = d.DbgServices(
|
||||
dump_file_path="/opt/nvme2n1/j00455527/dumps/sync_trans_false/032421/alexnet")
|
||||
|
||||
_ = debugger_backend.initialize(
|
||||
net_name="Network Name goes here!", is_sync_mode=True)
|
||||
|
||||
# NOTES:
|
||||
# -> watch_condition=6 is MIN_LT
|
||||
# -> watch_condition=18 is CHANGE_TOO_LARGE
|
||||
|
||||
# test 1: watchpoint set and hit (watch_condition=6)
|
||||
param1 = d.Parameter(name="param", disabled=False, value=0.0)
|
||||
_ = debugger_backend.add_watchpoint(watchpoint_id=1, watch_condition=6,
|
||||
check_node_list={"Default/network-WithLossCell/_backbone-AlexNet/conv3-Conv2d/"
|
||||
"Conv2D-op168":
|
||||
{"device_id": [0], "root_graph_id": [0], "is_parameter": False
|
||||
}}, parameter_list=[param1])
|
||||
|
||||
watchpoint_hits_test_1 = debugger_backend.check_watchpoints(iteration=2)
|
||||
if len(watchpoint_hits_test_1) != 1:
|
||||
print("ERROR -> test 1: watchpoint set but not hit just once")
|
||||
print_watchpoint_hits(watchpoint_hits_test_1, 1)
|
||||
|
||||
# test 2: watchpoint remove and ensure it's not hit
|
||||
_ = debugger_backend.remove_watchpoint(watchpoint_id=1)
|
||||
watchpoint_hits_test_2 = debugger_backend.check_watchpoints(iteration=2)
|
||||
if watchpoint_hits_test_2:
|
||||
print("ERROR -> test 2: watchpoint removed but hit")
|
||||
|
||||
# test 3: watchpoint set and not hit, then remove
|
||||
param2 = d.Parameter(name="param", disabled=False, value=-1000.0)
|
||||
_ = debugger_backend.add_watchpoint(watchpoint_id=2, watch_condition=6,
|
||||
check_node_list={"Default/network-WithLossCell/_backbone-AlexNet/conv3-Conv2d/"
|
||||
"Conv2D-op308":
|
||||
{"device_id": [0], "root_graph_id": [0], "is_parameter": False
|
||||
}}, parameter_list=[param2])
|
||||
|
||||
watchpoint_hits_test_3 = debugger_backend.check_watchpoints(iteration=2)
|
||||
if watchpoint_hits_test_3:
|
||||
print("ERROR -> test 3: watchpoint set but not supposed to be hit")
|
||||
_ = debugger_backend.remove_watchpoint(watchpoint_id=2)
|
||||
|
||||
# test 4: weight change watchpoint set and hit
|
||||
param_abs_mean_update_ratio_gt = d.Parameter(
|
||||
name="abs_mean_update_ratio_gt", disabled=False, value=0.0)
|
||||
param_epsilon = d.Parameter(name="epsilon", disabled=True, value=0.0)
|
||||
_ = debugger_backend.add_watchpoint(watchpoint_id=3, watch_condition=18,
|
||||
check_node_list={"Default/network-WithLossCell/_backbone-AlexNet/fc3-Dense/"
|
||||
"Parameter[6]_11/fc3.bias":
|
||||
{"device_id": [0], "root_graph_id": [0], "is_parameter": True
|
||||
}}, parameter_list=[param_abs_mean_update_ratio_gt,
|
||||
param_epsilon])
|
||||
|
||||
watchpoint_hits_test_4 = debugger_backend.check_watchpoints(iteration=3)
|
||||
if len(watchpoint_hits_test_4) != 1:
|
||||
print("ERROR -> test 4: watchpoint weight change set but not hit just once")
|
||||
print_watchpoint_hits(watchpoint_hits_test_4, 4)
|
||||
|
||||
|
||||
def print_watchpoint_hits(watchpoint_hits, test_id):
|
||||
"""Print watchpoint hits."""
|
||||
for x, _ in enumerate(watchpoint_hits):
|
||||
print("-----------------------------------------------------------")
|
||||
print("watchpoint_hit for test_%u attributes:" % test_id)
|
||||
print("name = ", watchpoint_hits[x].name)
|
||||
print("slot = ", watchpoint_hits[x].slot)
|
||||
print("condition = ", watchpoint_hits[x].condition)
|
||||
print("watchpoint_id = ", watchpoint_hits[x].watchpoint_id)
|
||||
for p, _ in enumerate(watchpoint_hits[x].parameters):
|
||||
print("parameter ", p, " name = ",
|
||||
watchpoint_hits[x].parameters[p].name)
|
||||
print("parameter ", p, " disabled = ",
|
||||
watchpoint_hits[x].parameters[p].disabled)
|
||||
print("parameter ", p, " value = ",
|
||||
watchpoint_hits[x].parameters[p].value)
|
||||
print("parameter ", p, " hit = ",
|
||||
watchpoint_hits[x].parameters[p].hit)
|
||||
print("parameter ", p, " actual_value = ",
|
||||
watchpoint_hits[x].parameters[p].actual_value)
|
||||
print("error code = ", watchpoint_hits[x].error_code)
|
||||
print("device_id = ", watchpoint_hits[x].device_id)
|
||||
print("root_graph_id = ", watchpoint_hits[x].root_graph_id)
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
|
@ -0,0 +1,70 @@
|
|||
-----------------------------------------------------------
|
||||
tensor_info_1 attributes:
|
||||
node name = Default/network-WithLossCell/_backbone-AlexNet/conv2-Conv2d/conv2.bias
|
||||
slot = 0
|
||||
iteration = 2
|
||||
device_id = None
|
||||
root_graph_id = 0
|
||||
is_parameter = True
|
||||
|
||||
tensor_data_1 attributes:
|
||||
data (printed in uint8) = [230 208 10 52 104 34 252 52 4 231 144 52 188 150 64 180 88 236
|
||||
15 180 254 135 180 51 131 226 147 52 88 202 62 53 2 43 55 53
|
||||
231 29 87 180 220 249 30 180 157 17 177 180 81 107 140 181 8 95
|
||||
192 180 89 134 112 180 96 238 90 178 156 196 212 180 206 25 15 181
|
||||
212 154 6 180 91 211 116 52 191 14 140 51 128 106 124 53 28 158
|
||||
70 181 182 21 251 50 100 204 157 179 88 202 42 180 7 95 8 53
|
||||
128 251 238 52 241 133 241 52 111 86 157 179 48 221 148 180 200 7
|
||||
141 180 236 226 182 51 190 82 158 180 140 108 179 180 195 134 215 179
|
||||
103 213 39 179 89 168 149 180 42 58 58 180 64 53 62 179 250 126
|
||||
158 52 38 83 117 52 0 0 136 180 136 133 122 51 110 18 131 179
|
||||
238 13 94 51 102 136 15 181 134 90 227 180 16 11 117 180 35 74
|
||||
163 52 105 0 87 181 112 18 131 50 226 233 67 181 217 172 10 52
|
||||
206 25 217 52 208 213 22 52 146 203 87 180 74 46 207 52 178 191
|
||||
4 180 100 93 216 52 119 190 171 180 223 2 5 181 128 72 207 179
|
||||
58 146 11 179 224 79 137 52 143 228 154 180 246 219 215 179 14 79
|
||||
195 52 126 29 64 52 132 192 42 51 94 220 86 52 94 109 1 181
|
||||
72 37 117 178 110 197 94 180 160 94 153 179 118 224 80 181 156 17
|
||||
37 50 120 156 162 53 26 115 135 180 228 20 29 53 145 126 147 52
|
||||
99 16 48 180 211 188 199 180 52 51 99 180 93 254 227 52 152 126
|
||||
123 49 6 18 16 181 5 163 130 51 27 158 98 53 134 235 189 52
|
||||
119 45 9 180 130 115 110 52 158 128 162 52 232 251 197 180 178 46
|
||||
158 179 57 214 157 52 172 207 161 180 208 0 222 49 242 99 32 53
|
||||
20 174 135 50 247 117 176 52 194 57 43 180 140 108 135 51 243 65
|
||||
175 51 187 73 156 51 63 232 217 50 180 234 115 52 194 168 148 52
|
||||
27 192 183 180 45 178 157 52 125 208 17 53 236 192 65 53 190 193
|
||||
7 53 254 246 57 53 3 43 199 51 64 164 215 180 220 104 240 51
|
||||
23 72 24 180 68 173 9 51 72 114 29 53 105 0 57 181 188 150
|
||||
8 53 229 97 131 53 0 34 189 51 163 146 74 53 31 244 204 51
|
||||
86 193 220 180 156 51 146 179]
|
||||
size in bytes = 512
|
||||
debugger dtype = 11
|
||||
shape = [128]
|
||||
-----------------------------------------------------------
|
||||
tensor_info_2 attributes:
|
||||
node name = Default/network-WithLossCell/_backbone-AlexNet/conv3-Conv2d/Conv2D-op171
|
||||
slot = 0
|
||||
iteration = 2
|
||||
device_id = None
|
||||
root_graph_id = 0
|
||||
is_parameter = False
|
||||
|
||||
tensor_data_2 attributes:
|
||||
data (printed in uint8) = [ 99 26 69 ... 154 218 164]
|
||||
size in bytes = 2076672
|
||||
debugger dtype = 10
|
||||
shape = [32, 192, 13, 13]
|
||||
-----------------------------------------------------------
|
||||
tensor_info_3 attributes:
|
||||
node name = Default/network-WithLossCell/_backbone-AlexNet/ReLUV2-op353
|
||||
slot = 1
|
||||
iteration = 2
|
||||
device_id = None
|
||||
root_graph_id = 0
|
||||
is_parameter = False
|
||||
|
||||
tensor_data_3 attributes:
|
||||
data (printed in uint8) = [19 17 27 ... 94 42 90]
|
||||
size in bytes = 129792
|
||||
debugger dtype = 6
|
||||
shape = [32, 12, 13, 13, 2]
|
|
@ -0,0 +1,74 @@
|
|||
# Copyright 2021 Huawei Technologies Co., Ltd
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
# ==============================================================================
|
||||
"""
|
||||
Read tensor test script for offline debugger APIs.
|
||||
"""
|
||||
|
||||
import mindspore.offline_debug.dbg_services as d
|
||||
import numpy as np
|
||||
|
||||
|
||||
def main():
|
||||
|
||||
debugger_backend = d.DbgServices(
|
||||
dump_file_path="/opt/nvme2n1/j00455527/dumps/sync_trans_true/032421/alexnet")
|
||||
|
||||
_ = debugger_backend.initialize(
|
||||
net_name="Network Name goes here!", is_sync_mode=True)
|
||||
|
||||
# parameter
|
||||
info1 = d.TensorInfo(node_name="Default/network-WithLossCell/_backbone-AlexNet/conv2-Conv2d/conv2.bias",
|
||||
slot=0, iteration=2, device_id=0, root_graph_id=0, is_parameter=True)
|
||||
# output tensor with zero slot
|
||||
info2 = d.TensorInfo(node_name="Default/network-WithLossCell/_backbone-AlexNet/conv3-Conv2d/Conv2D-op171",
|
||||
slot=0, iteration=2, device_id=0, root_graph_id=0, is_parameter=False)
|
||||
# output tensor with non-zero slot
|
||||
info3 = d.TensorInfo(node_name="Default/network-WithLossCell/_backbone-AlexNet/ReLUV2-op353",
|
||||
slot=1, iteration=2, device_id=0, root_graph_id=0, is_parameter=False)
|
||||
|
||||
tensor_info = [info1, info2, info3]
|
||||
|
||||
tensor_data = debugger_backend.read_tensors(tensor_info)
|
||||
|
||||
print_read_tensors(tensor_info, tensor_data)
|
||||
|
||||
|
||||
def print_read_tensors(tensor_info, tensor_data):
|
||||
"""Print read tensors."""
|
||||
for x, _ in enumerate(tensor_info):
|
||||
print("-----------------------------------------------------------")
|
||||
print("tensor_info_" + str(x+1) + " attributes:")
|
||||
print("node name = ", tensor_info[x].node_name)
|
||||
print("slot = ", tensor_info[x].slot)
|
||||
print("iteration = ", tensor_info[x].iteration)
|
||||
print("device_id = ", tensor_info[x].device_id)
|
||||
print("root_graph_id = ", tensor_info[x].root_graph_id)
|
||||
print("is_parameter = ", tensor_info[x].is_parameter)
|
||||
print()
|
||||
print("tensor_data_" + str(x+1) + " attributes:")
|
||||
print("data (printed in uint8) = ", np.frombuffer(
|
||||
tensor_data[x].data_ptr, np.uint8, tensor_data[x].data_size))
|
||||
py_byte_size = len(tensor_data[x].data_ptr)
|
||||
c_byte_size = tensor_data[x].data_size
|
||||
if c_byte_size != py_byte_size:
|
||||
print("The python byte size of ", py_byte_size,
|
||||
" does not match the C++ byte size of ", c_byte_size)
|
||||
print("size in bytes = ", tensor_data[x].data_size)
|
||||
print("debugger dtype = ", tensor_data[x].dtype)
|
||||
print("shape = ", tensor_data[x].shape)
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
|
@ -0,0 +1,261 @@
|
|||
/**
|
||||
* Copyright 2021 Huawei Technologies Co., Ltd
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
#include "debugger/offline_debug/dbg_services.h"
|
||||
|
||||
#include <algorithm>
|
||||
|
||||
DbgServices::DbgServices(bool verbose) {
|
||||
DbgLogger::verbose = verbose;
|
||||
char *dbg_log_path = getenv("OFFLINE_DBG_LOG");
|
||||
if (dbg_log_path != NULL) {
|
||||
DbgLogger::verbose = true;
|
||||
}
|
||||
debug_services = new DebugServices();
|
||||
}
|
||||
|
||||
DbgServices::DbgServices(const DbgServices &other) {
|
||||
MS_LOG(INFO) << "cpp DbgServices object is created via copy";
|
||||
debug_services = new DebugServices(*other.debug_services);
|
||||
}
|
||||
|
||||
DbgServices &DbgServices::operator=(const DbgServices &other) {
|
||||
MS_LOG(INFO) << "cpp DbgServices object is being assigned a different state";
|
||||
if (this != &other) {
|
||||
delete debug_services;
|
||||
debug_services = new DebugServices(*other.debug_services);
|
||||
}
|
||||
return *this;
|
||||
}
|
||||
|
||||
DbgServices::~DbgServices() {
|
||||
MS_LOG(INFO) << "cpp DbgServices object is deleted";
|
||||
delete debug_services;
|
||||
}
|
||||
|
||||
std::string DbgServices::GetVersion() {
|
||||
MS_LOG(INFO) << "get version is called";
|
||||
return "1.2.0";
|
||||
}
|
||||
|
||||
int32_t DbgServices::Initialize(std::string net_name, std::string dump_folder_path, bool is_sync_mode) {
|
||||
MS_LOG(INFO) << "cpp DbgServices initialize network name " << net_name;
|
||||
MS_LOG(INFO) << "cpp DbgServices initialize dump folder path " << dump_folder_path;
|
||||
MS_LOG(INFO) << "cpp DbgServices initialize sync mode " << is_sync_mode;
|
||||
debug_services->SetNetName(net_name);
|
||||
debug_services->SetDumpDir(dump_folder_path);
|
||||
debug_services->SetSyncMode(is_sync_mode);
|
||||
return 0;
|
||||
}
|
||||
|
||||
int32_t DbgServices::AddWatchpoint(
|
||||
unsigned int id, unsigned int watch_condition,
|
||||
std::map<std::string, std::map<std::string, std::variant<bool, std::vector<std::string>>>> check_nodes,
|
||||
std::vector<parameter_t> parameter_list) {
|
||||
MS_LOG(INFO) << "cpp start";
|
||||
|
||||
MS_LOG(INFO) << "cpp DbgServices AddWatchpoint id " << id;
|
||||
MS_LOG(INFO) << "cpp DbgServices AddWatchpoint watch_condition " << watch_condition;
|
||||
for (auto const &node : check_nodes) {
|
||||
MS_LOG(INFO) << "cpp DbgServices AddWatchpoint name " << node.first;
|
||||
auto attr_map = node.second;
|
||||
|
||||
bool is_parameter = std::get<bool>(attr_map["is_parameter"]);
|
||||
MS_LOG(INFO) << "cpp DbgServices AddWatchpoint is_parameter " << is_parameter;
|
||||
|
||||
// std::vector<uint32_t> device_id = std::get<std::vector<uint32_t>>(attr_map["device_id"]);
|
||||
std::vector<std::string> device_id_str = std::get<std::vector<std::string>>(attr_map["device_id"]);
|
||||
std::vector<std::uint32_t> device_id;
|
||||
std::transform(device_id_str.begin(), device_id_str.end(), std::back_inserter(device_id),
|
||||
[](std::string &id_str) -> std::uint32_t { return static_cast<uint32_t>(std::stoul(id_str)); });
|
||||
MS_LOG(INFO) << "cpp DbgServices AddWatchpoint device_id ";
|
||||
for (auto const &i : device_id) {
|
||||
MS_LOG(INFO) << i << " ";
|
||||
}
|
||||
|
||||
// std::vector<uint32_t> root_graph_id = std::get<std::vector<uint32_t>>(attr_map["root_graph_id"]);
|
||||
std::vector<std::string> root_graph_id_str = std::get<std::vector<std::string>>(attr_map["root_graph_id"]);
|
||||
std::vector<std::uint32_t> root_graph_id;
|
||||
std::transform(
|
||||
root_graph_id_str.begin(), root_graph_id_str.end(), std::back_inserter(root_graph_id),
|
||||
[](std::string &graph_str) -> std::uint32_t { return static_cast<uint32_t>(std::stoul(graph_str)); });
|
||||
MS_LOG(INFO) << "cpp DbgServices AddWatchpoint root_graph_id";
|
||||
for (auto const &j : root_graph_id) {
|
||||
MS_LOG(INFO) << j << " ";
|
||||
}
|
||||
}
|
||||
|
||||
for (auto const ¶meter : parameter_list) {
|
||||
MS_LOG(INFO) << "cpp DbgServices AddWatchpoint parameter name " << parameter.name;
|
||||
MS_LOG(INFO) << "cpp DbgServices AddWatchpoint parameter disabled " << parameter.disabled;
|
||||
MS_LOG(INFO) << "cpp DbgServices AddWatchpoint parameter value " << parameter.value;
|
||||
MS_LOG(INFO) << "cpp DbgServices AddWatchpoint parameter hit " << parameter.hit;
|
||||
MS_LOG(INFO) << "cpp DbgServices AddWatchpoint parameter actual_value " << parameter.actual_value;
|
||||
}
|
||||
|
||||
std::vector<std::tuple<std::string, bool>> check_node_list;
|
||||
std::vector<std::tuple<std::string, std::vector<uint32_t>>> check_node_device_list;
|
||||
std::vector<std::tuple<std::string, std::vector<uint32_t>>> check_node_graph_list;
|
||||
std::vector<DebugServices::parameter_t> parameter_list_backend;
|
||||
|
||||
std::transform(check_nodes.begin(), check_nodes.end(), std::back_inserter(check_node_list),
|
||||
[](auto &node) -> std::tuple<std::string, bool> {
|
||||
auto attr_map = node.second;
|
||||
return std::make_tuple(node.first, std::get<bool>(attr_map["is_parameter"]));
|
||||
});
|
||||
|
||||
std::transform(check_nodes.begin(), check_nodes.end(), std::back_inserter(check_node_device_list),
|
||||
[](auto &node) -> std::tuple<std::string, std::vector<uint32_t>> {
|
||||
auto attr_map = node.second;
|
||||
std::vector<std::string> device_id_str = std::get<std::vector<std::string>>(attr_map["device_id"]);
|
||||
std::vector<std::uint32_t> device_id;
|
||||
std::transform(
|
||||
device_id_str.begin(), device_id_str.end(), std::back_inserter(device_id),
|
||||
[](std::string &id_str) -> std::uint32_t { return static_cast<uint32_t>(std::stoul(id_str)); });
|
||||
return std::make_tuple(node.first, device_id);
|
||||
});
|
||||
|
||||
std::transform(
|
||||
check_nodes.begin(), check_nodes.end(), std::back_inserter(check_node_graph_list),
|
||||
[](auto &node) -> std::tuple<std::string, std::vector<uint32_t>> {
|
||||
auto attr_map = node.second;
|
||||
std::vector<std::string> root_graph_id_str = std::get<std::vector<std::string>>(attr_map["root_graph_id"]);
|
||||
std::vector<std::uint32_t> root_graph_id;
|
||||
std::transform(
|
||||
root_graph_id_str.begin(), root_graph_id_str.end(), std::back_inserter(root_graph_id),
|
||||
[](std::string &graph_str) -> std::uint32_t { return static_cast<uint32_t>(std::stoul(graph_str)); });
|
||||
return std::make_tuple(node.first, root_graph_id);
|
||||
});
|
||||
|
||||
std::transform(
|
||||
parameter_list.begin(), parameter_list.end(), std::back_inserter(parameter_list_backend),
|
||||
[](const parameter_t ¶meter) -> DebugServices::parameter_t {
|
||||
return DebugServices::parameter_t{parameter.name, parameter.disabled, parameter.value, parameter.hit};
|
||||
});
|
||||
|
||||
debug_services->AddWatchpoint(id, watch_condition, 0, check_node_list, parameter_list_backend,
|
||||
&check_node_device_list, &check_node_graph_list);
|
||||
MS_LOG(INFO) << "cpp end";
|
||||
return 0;
|
||||
}
|
||||
|
||||
int32_t DbgServices::RemoveWatchpoint(unsigned int id) {
|
||||
MS_LOG(INFO) << "cpp DbgServices RemoveWatchpoint id " << id;
|
||||
debug_services->RemoveWatchpoint(id);
|
||||
return 0;
|
||||
}
|
||||
|
||||
std::vector<watchpoint_hit_t> DbgServices::CheckWatchpoints(unsigned int iteration) {
|
||||
MS_LOG(INFO) << "cpp DbgServices CheckWatchpoint iteration " << iteration;
|
||||
|
||||
std::vector<std::string> name;
|
||||
std::vector<std::string> slot;
|
||||
std::vector<int> condition;
|
||||
std::vector<unsigned int> watchpoint_id;
|
||||
std::vector<std::string> overflow_ops;
|
||||
std::vector<std::vector<DebugServices::parameter_t>> parameters;
|
||||
std::vector<int32_t> error_codes;
|
||||
std::vector<unsigned int> device_id;
|
||||
std::vector<unsigned int> root_graph_id;
|
||||
// #ifdef ENABLE_D
|
||||
// overflow_ops = CheckOpOverflow();
|
||||
// #endif
|
||||
|
||||
std::vector<std::shared_ptr<TensorData>> tensor_list;
|
||||
tensor_list = debug_services->ReadNeededDumpedTensors(iteration);
|
||||
|
||||
debug_services->CheckWatchpoints(&name, &slot, &condition, &watchpoint_id, ¶meters, &error_codes, overflow_ops,
|
||||
&tensor_list, false, true, true, &device_id, &root_graph_id);
|
||||
|
||||
std::vector<watchpoint_hit_t> hits;
|
||||
for (unsigned int i = 0; i < name.size(); i++) {
|
||||
std::vector<DebugServices::parameter_t> ¶meter = parameters[i];
|
||||
std::vector<parameter_t> api_parameter_vector;
|
||||
for (const auto &p : parameter) {
|
||||
parameter_t api_parameter(p.name, p.disabled, p.value, p.hit, p.actual_value);
|
||||
api_parameter_vector.push_back(api_parameter);
|
||||
}
|
||||
watchpoint_hit_t hit(name[i], std::stoi(slot[i]), condition[i], watchpoint_id[i], api_parameter_vector,
|
||||
error_codes[i], device_id[i], root_graph_id[i]);
|
||||
|
||||
MS_LOG(INFO) << "cpp DbgServices watchpoint_hit_t name " << hit.name;
|
||||
MS_LOG(INFO) << "cpp DbgServices watchpoint_hit_t slot " << hit.slot;
|
||||
MS_LOG(INFO) << "cpp DbgServices watchpoint_hit_t watchpoint_id " << hit.watchpoint_id;
|
||||
MS_LOG(INFO) << "cpp DbgServices watchpoint_hit_t error_code " << hit.error_code;
|
||||
MS_LOG(INFO) << "cpp DbgServices watchpoint_hit_t device_id " << hit.device_id;
|
||||
MS_LOG(INFO) << "cpp DbgServices watchpoint_hit_t root_graph_id " << hit.root_graph_id;
|
||||
|
||||
for (auto const ¶meter_i : api_parameter_vector) {
|
||||
MS_LOG(INFO) << "cpp DbgServices watchpoint_hit_t parameter name " << parameter_i.name;
|
||||
MS_LOG(INFO) << "cpp DbgServices watchpoint_hit_t parameter disabled " << parameter_i.disabled;
|
||||
MS_LOG(INFO) << "cpp DbgServices watchpoint_hit_t parameter value " << parameter_i.value;
|
||||
MS_LOG(INFO) << "cpp DbgServices watchpoint_hit_t parameter hit " << parameter_i.hit;
|
||||
MS_LOG(INFO) << "cpp DbgServices watchpoint_hit_t parameter actual_value " << parameter_i.actual_value;
|
||||
}
|
||||
|
||||
hits.push_back(hit);
|
||||
}
|
||||
return hits;
|
||||
}
|
||||
|
||||
std::string GetTensorFullName(tensor_info_t info) {
|
||||
std::string node_name = info.node_name;
|
||||
if (info.is_parameter) {
|
||||
// scopes in node name are separated by '/'
|
||||
// use the name without scope if truncate is true
|
||||
std::size_t found = node_name.find_last_of("/");
|
||||
node_name = node_name.substr(found + 1);
|
||||
}
|
||||
return node_name + ":" + std::to_string(info.slot);
|
||||
}
|
||||
|
||||
unsigned int GetTensorDeviceId(tensor_info_t info) { return info.device_id; }
|
||||
|
||||
unsigned int GetTensorRootGraphId(tensor_info_t info) { return info.root_graph_id; }
|
||||
|
||||
unsigned int GetTensorIteration(tensor_info_t info) { return info.iteration; }
|
||||
|
||||
unsigned int GetTensorSlot(tensor_info_t info) { return info.slot; }
|
||||
|
||||
std::vector<tensor_data_t> DbgServices::ReadTensors(std::vector<tensor_info_t> info) {
|
||||
for (auto i : info) {
|
||||
MS_LOG(INFO) << "cpp DbgServices ReadTensor info name " << i.node_name << ", slot " << i.slot << ", iteration "
|
||||
<< i.iteration << ", device_id " << i.device_id << ", root_graph_id " << i.root_graph_id;
|
||||
}
|
||||
std::vector<std::string> backend_name;
|
||||
std::vector<unsigned int> device_id;
|
||||
std::vector<unsigned int> root_graph_id;
|
||||
std::vector<unsigned int> iteration;
|
||||
std::vector<size_t> slot;
|
||||
std::vector<std::shared_ptr<TensorData>> result_list;
|
||||
std::vector<tensor_data_t> tensors_read;
|
||||
|
||||
std::transform(info.begin(), info.end(), std::back_inserter(backend_name), GetTensorFullName);
|
||||
std::transform(info.begin(), info.end(), std::back_inserter(slot), GetTensorSlot);
|
||||
std::transform(info.begin(), info.end(), std::back_inserter(device_id), GetTensorDeviceId);
|
||||
std::transform(info.begin(), info.end(), std::back_inserter(root_graph_id), GetTensorRootGraphId);
|
||||
std::transform(info.begin(), info.end(), std::back_inserter(iteration), GetTensorIteration);
|
||||
|
||||
MS_LOG(INFO) << "cpp before";
|
||||
debug_services->ReadDumpedTensor(backend_name, slot, device_id, iteration, root_graph_id, &result_list);
|
||||
MS_LOG(INFO) << "cpp after";
|
||||
|
||||
for (auto result : result_list) {
|
||||
tensor_data_t tensor_data_item(result->GetDataPtr(), result->GetByteSize(), result->GetType(), result->GetShape());
|
||||
tensors_read.push_back(tensor_data_item);
|
||||
}
|
||||
MS_LOG(INFO) << "cpp end";
|
||||
return tensors_read;
|
||||
}
|
|
@ -0,0 +1,149 @@
|
|||
/**
|
||||
* Copyright 2021 Huawei Technologies Co., Ltd
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
#ifndef DEBUG_DBG_SERVICES_H_
|
||||
#define DEBUG_DBG_SERVICES_H_
|
||||
|
||||
#include <vector>
|
||||
#include <string>
|
||||
#include <map>
|
||||
#include <memory>
|
||||
#include <tuple>
|
||||
#include <iostream>
|
||||
#include <variant>
|
||||
#include "pybind11/pybind11.h"
|
||||
#include "pybind11/stl.h"
|
||||
#include "pybind11/stl_bind.h"
|
||||
|
||||
#include "debug/debug_services.h"
|
||||
namespace py = pybind11;
|
||||
|
||||
typedef struct parameter {
|
||||
parameter(const std::string &name, bool disabled, double value, bool hit, double actual_value)
|
||||
: name(name), disabled(disabled), value(value), hit(hit), actual_value(actual_value) {}
|
||||
const std::string get_name() const { return name; }
|
||||
const bool get_disabled() const { return disabled; }
|
||||
const double get_value() const { return value; }
|
||||
const bool get_hit() const { return hit; }
|
||||
const double get_actual_value() const { return actual_value; }
|
||||
std::string name;
|
||||
bool disabled;
|
||||
double value;
|
||||
bool hit;
|
||||
double actual_value;
|
||||
} parameter_t;
|
||||
|
||||
typedef struct watchpoint_hit {
|
||||
watchpoint_hit(const std::string &name, uint32_t slot, int condition, uint32_t watchpoint_id,
|
||||
const std::vector<parameter_t> ¶meters, int32_t error_code, uint32_t device_id,
|
||||
uint32_t root_graph_id)
|
||||
: name(name),
|
||||
slot(slot),
|
||||
condition(condition),
|
||||
watchpoint_id(watchpoint_id),
|
||||
parameters(parameters),
|
||||
error_code(error_code),
|
||||
device_id(device_id),
|
||||
root_graph_id(root_graph_id) {}
|
||||
const std::string get_name() const { return name; }
|
||||
const uint32_t get_slot() const { return slot; }
|
||||
const int get_condition() const { return condition; }
|
||||
const uint32_t get_watchpoint_id() const { return watchpoint_id; }
|
||||
const std::vector<parameter_t> get_parameters() const { return parameters; }
|
||||
const int32_t get_error_code() const { return error_code; }
|
||||
const uint32_t get_device_id() const { return device_id; }
|
||||
const uint32_t get_root_graph_id() const { return root_graph_id; }
|
||||
std::string name;
|
||||
uint32_t slot;
|
||||
int condition;
|
||||
uint32_t watchpoint_id;
|
||||
std::vector<parameter_t> parameters;
|
||||
int32_t error_code;
|
||||
uint32_t device_id;
|
||||
uint32_t root_graph_id;
|
||||
} watchpoint_hit_t;
|
||||
|
||||
typedef struct tensor_info {
|
||||
tensor_info(const std::string &node_name, uint32_t slot, uint32_t iteration, uint32_t device_id,
|
||||
uint32_t root_graph_id, bool is_parameter)
|
||||
: node_name(node_name),
|
||||
slot(slot),
|
||||
iteration(iteration),
|
||||
device_id(device_id),
|
||||
root_graph_id(root_graph_id),
|
||||
is_parameter(is_parameter) {}
|
||||
const std::string get_node_name() const { return node_name; }
|
||||
const uint32_t get_slot() const { return slot; }
|
||||
const uint32_t get_iteration() const { return iteration; }
|
||||
const uint32_t get_device_id() const { return device_id; }
|
||||
const uint32_t get_root_graph_id() const { return root_graph_id; }
|
||||
const bool get_is_parameter() const { return is_parameter; }
|
||||
std::string node_name;
|
||||
uint32_t slot;
|
||||
uint32_t iteration;
|
||||
uint32_t device_id;
|
||||
uint32_t root_graph_id;
|
||||
bool is_parameter;
|
||||
} tensor_info_t;
|
||||
|
||||
typedef struct tensor_data {
|
||||
tensor_data(char *data_ptr, uint64_t data_size, int dtype, const std::vector<int64_t> &shape)
|
||||
: data_size(data_size), dtype(dtype), shape(shape) {
|
||||
if (data_ptr != NULL) {
|
||||
this->data_ptr = py::bytes(data_ptr, data_size);
|
||||
} else {
|
||||
this->data_ptr = py::bytes();
|
||||
}
|
||||
}
|
||||
const py::bytes get_data_ptr() const { return data_ptr; }
|
||||
const uint64_t get_data_size() const { return data_size; }
|
||||
const int get_dtype() const { return dtype; }
|
||||
const std::vector<int64_t> &get_shape() const { return shape; }
|
||||
py::bytes data_ptr;
|
||||
uint64_t data_size;
|
||||
int dtype;
|
||||
std::vector<int64_t> shape;
|
||||
} tensor_data_t;
|
||||
|
||||
class DbgServices {
|
||||
private:
|
||||
DebugServices *debug_services;
|
||||
|
||||
public:
|
||||
explicit DbgServices(bool verbose = false);
|
||||
|
||||
DbgServices(const DbgServices &other);
|
||||
|
||||
DbgServices &operator=(const DbgServices &other);
|
||||
|
||||
~DbgServices();
|
||||
|
||||
int32_t Initialize(std::string net_name, std::string dump_folder_path, bool is_sync_mode);
|
||||
|
||||
int32_t AddWatchpoint(
|
||||
unsigned int id, unsigned int watch_condition,
|
||||
std::map<std::string, std::map<std::string, std::variant<bool, std::vector<std::string>>>> check_nodes,
|
||||
std::vector<parameter_t> parameter_list);
|
||||
|
||||
int32_t RemoveWatchpoint(unsigned int id);
|
||||
|
||||
std::vector<watchpoint_hit_t> CheckWatchpoints(unsigned int iteration);
|
||||
|
||||
std::vector<tensor_data_t> ReadTensors(std::vector<tensor_info_t> info);
|
||||
|
||||
std::string GetVersion();
|
||||
};
|
||||
|
||||
#endif // DEBUG_DBG_SERVICES_H_
|
|
@ -0,0 +1,865 @@
|
|||
# Copyright 2021 Huawei Technologies Co., Ltd
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
# ==============================================================================
|
||||
"""
|
||||
The module DbgServices provides offline debugger APIs.
|
||||
"""
|
||||
|
||||
import mindspore._mindspore_offline_debug as cds
|
||||
from mi_validators import check_init, check_initialize, check_add_watchpoint, check_remove_watchpoint, check_check_watchpoints, check_read_tensors, check_initialize_done, check_tensor_info_init, check_tensor_data_init, check_watchpoint_hit_init, check_parameter_init
|
||||
|
||||
|
||||
def get_version():
|
||||
"""
|
||||
Function to return offline Debug Services version.
|
||||
|
||||
Returns:
|
||||
version (str): dbgServices version.
|
||||
|
||||
Examples:
|
||||
>>> from mindspore.ccsrc.debug.debugger.offline_debug import dbg_services
|
||||
>>> version = dbg_services.get_version()
|
||||
"""
|
||||
return cds.DbgServices(False).GetVersion()
|
||||
|
||||
class DbgLogger:
|
||||
"""
|
||||
Offline Debug Services Logger
|
||||
|
||||
Args:
|
||||
verbose (bool): whether to print logs.
|
||||
|
||||
Examples:
|
||||
>>> from mindspore.ccsrc.debug.debugger.offline_debug import dbg_services
|
||||
>>> version = dbg_services.DbgLogger(verbose=False)
|
||||
"""
|
||||
def __init__(self, verbose):
|
||||
self.verbose = verbose
|
||||
|
||||
def __call__(self, *logs):
|
||||
if self.verbose:
|
||||
print(logs)
|
||||
|
||||
|
||||
log = DbgLogger(False)
|
||||
|
||||
|
||||
class DbgServices():
|
||||
"""
|
||||
Offline Debug Services class.
|
||||
|
||||
Args:
|
||||
dump_file_path (str): directory where the dump files are saved.
|
||||
verbose (bool): whether to print logs (default: False)..
|
||||
|
||||
Examples:
|
||||
>>> from mindspore.ccsrc.debug.debugger.offline_debug import dbg_services
|
||||
>>> d = dbg_services.DbgServices(dump_file_path="dump_file_path",
|
||||
>>> verbose=True)
|
||||
"""
|
||||
|
||||
@check_init
|
||||
def __init__(self, dump_file_path, verbose=False):
|
||||
log.verbose = verbose
|
||||
log("in Python __init__, file path is ", dump_file_path)
|
||||
self.dump_file_path = dump_file_path
|
||||
self.dbg_instance = cds.DbgServices(verbose)
|
||||
self.version = self.dbg_instance.GetVersion()
|
||||
self.verbose = verbose
|
||||
self.initialized = False
|
||||
|
||||
@check_initialize
|
||||
def initialize(self, net_name, is_sync_mode=True):
|
||||
"""
|
||||
Initialize Debug Service.
|
||||
|
||||
Args:
|
||||
net_name (str): Network name.
|
||||
is_sync_mode (bool): Whether to process synchronous or asynchronous dump files mode
|
||||
(default: True (synchronous)).
|
||||
|
||||
Returns:
|
||||
Initialized Debug Service instance.
|
||||
|
||||
Examples:
|
||||
>>> from mindspore.ccsrc.debug.debugger.offline_debug import dbg_services
|
||||
>>> d = dbg_services.DbgServices(dump_file_path="dump_file_path",
|
||||
>>> verbose=True)
|
||||
>>> d_init = d.initialize(net_name="network name", is_sync_mode=True)
|
||||
"""
|
||||
|
||||
log("in Python Initialize dump_file_path ", self.dump_file_path)
|
||||
self.initialized = True
|
||||
return self.dbg_instance.Initialize(net_name, self.dump_file_path, is_sync_mode)
|
||||
|
||||
@check_initialize_done
|
||||
@check_add_watchpoint
|
||||
def add_watchpoint(self, watchpoint_id, watch_condition, check_node_list, parameter_list):
|
||||
"""
|
||||
Adding watchpoint to Debug Service instance.
|
||||
|
||||
Args:
|
||||
watchpoint_id (int): Watchpoint id
|
||||
watch_condition (int): A representation of the condition to be checked.
|
||||
check_node_list (dict): Dictionary of node names (str) as key,
|
||||
mapping to device_id (list of ints), root_graph_id (list of ints) and is_parameter
|
||||
(bool).
|
||||
parameter_list (list): List of parameters in watchpoint. Parameters should be instances of Parameter class.
|
||||
Each parameter describes the value to be checked in watchpoint.
|
||||
|
||||
Returns:
|
||||
Debug Service instance with added watchpoint.
|
||||
|
||||
Examples:
|
||||
>>> from mindspore.ccsrc.debug.debugger.offline_debug import dbg_services
|
||||
>>> d = dbg_services.DbgServices(dump_file_path="dump_file_path",
|
||||
>>> verbose=True)
|
||||
>>> d_init = d.initialize(is_sync_mode=True)
|
||||
>>> d_wp = d_init.add_watchpoint(watchpoint_id=1,
|
||||
>>> watch_condition=6,
|
||||
>>> check_node_list={"conv2.bias" : {"device_id": [0],
|
||||
root_graph_id: [0], "is_parameter": True}},
|
||||
>>> parameter_list=[dbg_services.Parameter(name="param",
|
||||
>>> disabled=False,
|
||||
>>> value=0.0,
|
||||
>>> hit=False,
|
||||
>>> actual_value=0.0)])
|
||||
"""
|
||||
|
||||
print("Amir: ", check_node_list)
|
||||
|
||||
log("in Python AddWatchpoint")
|
||||
parameter_list_inst = []
|
||||
for elem in parameter_list:
|
||||
parameter_list_inst.append(elem.instance)
|
||||
return self.dbg_instance.AddWatchpoint(watchpoint_id, watch_condition, check_node_list, parameter_list_inst)
|
||||
|
||||
@check_initialize_done
|
||||
@check_remove_watchpoint
|
||||
def remove_watchpoint(self, watchpoint_id):
|
||||
"""
|
||||
Removing watchpoint from Debug Service instance.
|
||||
|
||||
Args:
|
||||
watchpoint_id (int): Watchpoint id
|
||||
|
||||
Returns:
|
||||
Debug Service instance with removed watchpoint.
|
||||
|
||||
Examples:
|
||||
>>> from mindspore.ccsrc.debug.debugger.offline_debug import dbg_services
|
||||
>>> d = dbg_services.DbgServices(dump_file_path="dump_file_path",
|
||||
>>> verbose=True)
|
||||
>>> d_init = d.initialize(is_sync_mode=True)
|
||||
>>> d_wp = d_init.add_watchpoint(watchpoint_id=1,
|
||||
>>> watch_condition=6,
|
||||
>>> check_node_list={"conv2.bias" : {"device_id": [5],
|
||||
root_graph_id: [0], "is_parameter": True}},
|
||||
>>> parameter_list=[dbg_services.Parameter(name="param",
|
||||
>>> disabled=False,
|
||||
>>> value=0.0,
|
||||
>>> hit=False,
|
||||
>>> actual_value=0.0)])
|
||||
>>> d_wp = d_wp.remove_watchpoint(watchpoint_id=1)
|
||||
"""
|
||||
|
||||
log("in Python Remove Watchpoint id ", watchpoint_id)
|
||||
return self.dbg_instance.RemoveWatchpoint(watchpoint_id)
|
||||
|
||||
@check_initialize_done
|
||||
@check_check_watchpoints
|
||||
def check_watchpoints(self, iteration):
|
||||
"""
|
||||
Checking watchpoint at given iteration.
|
||||
|
||||
Args:
|
||||
iteration (int): Watchpoint check iteration.
|
||||
|
||||
Returns:
|
||||
Watchpoint hit list.
|
||||
|
||||
Examples:
|
||||
>>> from mindspore.ccsrc.debug.debugger.offline_debug import dbg_services
|
||||
>>> d = dbg_services.DbgServices(dump_file_path="dump_file_path",
|
||||
>>> verbose=True)
|
||||
>>> d_init = d.initialize(is_sync_mode=True)
|
||||
>>> d_wp = d_init.add_watchpoint(id=1,
|
||||
>>> watch_condition=6,
|
||||
>>> check_node_list={"conv2.bias" : {"device_id": [5],
|
||||
root_graph_id: [0], "is_parameter": True}},
|
||||
>>> parameter_list=[dbg_services.Parameter(name="param",
|
||||
>>> disabled=False,
|
||||
>>> value=0.0,
|
||||
>>> hit=False,
|
||||
>>> actual_value=0.0)])
|
||||
>>> watchpoints = d_wp.check_watchpoints(iteration=8)
|
||||
"""
|
||||
|
||||
log("in Python CheckWatchpoints iteration ", iteration)
|
||||
watchpoint_list = self.dbg_instance.CheckWatchpoints(iteration)
|
||||
watchpoint_hit_list = []
|
||||
for watchpoint in watchpoint_list:
|
||||
name = watchpoint.get_name()
|
||||
slot = watchpoint.get_slot()
|
||||
condition = watchpoint.get_condition()
|
||||
watchpoint_id = watchpoint.get_watchpoint_id()
|
||||
parameters = watchpoint.get_parameters()
|
||||
error_code = watchpoint.get_error_code()
|
||||
device_id = watchpoint.get_device_id()
|
||||
root_graph_id = watchpoint.get_root_graph_id()
|
||||
param_list = []
|
||||
for param in parameters:
|
||||
p_name = param.get_name()
|
||||
disabled = param.get_disabled()
|
||||
value = param.get_value()
|
||||
hit = param.get_hit()
|
||||
actual_value = param.get_actual_value()
|
||||
param_list.append(Parameter(p_name, disabled, value, hit, actual_value))
|
||||
watchpoint_hit_list.append(WatchpointHit(name, slot, condition, watchpoint_id,
|
||||
param_list, error_code, device_id, root_graph_id))
|
||||
return watchpoint_hit_list
|
||||
|
||||
@check_initialize_done
|
||||
@check_read_tensors
|
||||
def read_tensors(self, info):
|
||||
"""
|
||||
Returning tensor data object describing the tensor requested tensor.
|
||||
|
||||
Args:
|
||||
info (list): List of TensorInfo objects.
|
||||
|
||||
Returns:
|
||||
TensorData list (list).
|
||||
|
||||
Examples:
|
||||
>>> from mindspore.ccsrc.debug.debugger.offline_debug import dbg_services
|
||||
>>> d = dbg_services.DbgServices(dump_file_path="dump_file_path",
|
||||
>>> verbose=True)
|
||||
>>> d_init = d.initialize(is_sync_mode=True)
|
||||
>>> tensor_data_list = d_init.read_tensors([dbg_services.TensorInfo(node_name="conv2.bias",
|
||||
>>> slot=0,
|
||||
>>> iteration=8,
|
||||
>>> device_id=5,
|
||||
>>> root_graph_id=0,
|
||||
>>> is_parameter=True)])
|
||||
"""
|
||||
|
||||
log("in Python ReadTensors info ", info)
|
||||
info_list_inst = []
|
||||
for elem in info:
|
||||
log("in Python ReadTensors info ", info)
|
||||
info_list_inst.append(elem.instance)
|
||||
tensor_data_list = self.dbg_instance.ReadTensors(info_list_inst)
|
||||
tensor_data_list_ret = []
|
||||
for elem in tensor_data_list:
|
||||
if elem.get_data_size() == 0:
|
||||
tensor_data = TensorData(b'', elem.get_data_size(), elem.get_dtype(), elem.get_shape())
|
||||
else:
|
||||
tensor_data = TensorData(elem.get_data_ptr(), elem.get_data_size(), elem.get_dtype(), elem.get_shape())
|
||||
tensor_data_list_ret.append(tensor_data)
|
||||
return tensor_data_list_ret
|
||||
|
||||
class TensorInfo():
|
||||
"""
|
||||
Tensor Information class.
|
||||
|
||||
Args:
|
||||
node_name (str): Fully qualified name of the desired node.
|
||||
slot (int): The particular output for the requested node.
|
||||
iteration (int): The desired itraretion to gather tensor information.
|
||||
device_id (int): The desired device id to gather tensor information.
|
||||
is_parameter (bool): Whether node is a parameter (input, constant, bias, parameter).
|
||||
|
||||
Examples:
|
||||
>>> from mindspore.ccsrc.debug.debugger.offline_debug import dbg_services
|
||||
>>> tensor_info = dbg_services.TensorInfo(node_name="conv2.bias",
|
||||
>>> slot=0,
|
||||
>>> iteration=8,
|
||||
>>> device_id=5,
|
||||
>>> root_graph_id=0,
|
||||
>>> is_parameter=True)
|
||||
"""
|
||||
|
||||
@check_tensor_info_init
|
||||
def __init__(self, node_name, slot, iteration, device_id, root_graph_id, is_parameter):
|
||||
self.instance = cds.tensor_info(node_name, slot, iteration, device_id, root_graph_id, is_parameter)
|
||||
|
||||
@property
|
||||
def node_name(self):
|
||||
"""
|
||||
Function to receive TensorInfo node_name.
|
||||
|
||||
Returns:
|
||||
node_name of TensorInfo instance (str).
|
||||
|
||||
Examples:
|
||||
>>> from mindspore.ccsrc.debug.debugger.offline_debug import dbg_services
|
||||
>>> tensor_info = dbg_services.TensorInfo(node_name="conv2.bias",
|
||||
>>> slot=0,
|
||||
>>> iteration=8,
|
||||
>>> device_id=5,
|
||||
>>> root_graph_id=0,
|
||||
>>> is_parameter=True)
|
||||
>>> name = tensor_info.node_name
|
||||
"""
|
||||
|
||||
return self.instance.get_node_name()
|
||||
|
||||
@property
|
||||
def slot(self):
|
||||
"""
|
||||
Function to receive TensorInfo slot.
|
||||
|
||||
Returns:
|
||||
slot of TensorInfo instance (int).
|
||||
|
||||
Examples:
|
||||
>>> from mindspore.ccsrc.debug.debugger.offline_debug import dbg_services
|
||||
>>> tensor_info = dbg_services.TensorInfo(node_name="conv2.bias",
|
||||
>>> slot=0,
|
||||
>>> iteration=8,
|
||||
>>> device_id=5,
|
||||
>>> root_graph_id=0,
|
||||
>>> is_parameter=True)
|
||||
>>> slot = tensor_info.slot
|
||||
"""
|
||||
|
||||
return self.instance.get_slot()
|
||||
|
||||
@property
|
||||
def iteration(self):
|
||||
"""
|
||||
Function to receive TensorInfo iteration.
|
||||
|
||||
Returns:
|
||||
iteration of TensorInfo instance (int).
|
||||
|
||||
Examples:
|
||||
>>> from mindspore.ccsrc.debug.debugger.offline_debug import dbg_services
|
||||
>>> tensor_info = dbg_services.TensorInfo(node_name="conv2.bias",
|
||||
>>> slot=0,
|
||||
>>> iteration=8,
|
||||
>>> device_id=5,
|
||||
>>> root_graph_id=0,
|
||||
>>> is_parameter=True)
|
||||
>>> iteration = tensor_info.iteration
|
||||
"""
|
||||
|
||||
return self.instance.get_iteration()
|
||||
|
||||
@property
|
||||
def device_id(self):
|
||||
"""
|
||||
Function to receive TensorInfo device_id.
|
||||
|
||||
Returns:
|
||||
device_id of TensorInfo instance (int).
|
||||
|
||||
Examples:
|
||||
>>> from mindspore.ccsrc.debug.debugger.offline_debug import dbg_services
|
||||
>>> tensor_info = dbg_services.TensorInfo(node_name="conv2.bias",
|
||||
>>> slot=0,
|
||||
>>> iteration=8,
|
||||
>>> device_id=5,
|
||||
>>> root_graph_id=0,
|
||||
>>> is_parameter=True)
|
||||
>>> device_id = tensor_info.device_id
|
||||
"""
|
||||
|
||||
@property
|
||||
def root_graph_id(self):
|
||||
"""
|
||||
Function to receive TensorInfo root_graph_id.
|
||||
|
||||
Returns:
|
||||
root_graph_id of TensorInfo instance (int).
|
||||
|
||||
Examples:
|
||||
>>> from mindspore.ccsrc.debug.debugger.offline_debug import dbg_services
|
||||
>>> tensor_info = dbg_services.TensorInfo(node_name="conv2.bias",
|
||||
>>> slot=0,
|
||||
>>> iteration=8,
|
||||
>>> device_id=5,
|
||||
>>> root_graph_id=0,
|
||||
>>> is_parameter=True)
|
||||
>>> device_id = tensor_info.root_graph_id
|
||||
"""
|
||||
|
||||
return self.instance.get_root_graph_id()
|
||||
|
||||
@property
|
||||
def is_parameter(self):
|
||||
"""
|
||||
Function to receive TensorInfo is_parameter.
|
||||
|
||||
Returns:
|
||||
is_parameter of TensorInfo instance (bool).
|
||||
|
||||
Examples:
|
||||
>>> from mindspore.ccsrc.debug.debugger.offline_debug import dbg_services
|
||||
>>> tensor_info = dbg_services.TensorInfo(node_name="conv2.bias",
|
||||
>>> slot=0,
|
||||
>>> iteration=8,
|
||||
>>> device_id=5,
|
||||
>>> root_graph_id=0,
|
||||
>>> is_parameter=True)
|
||||
>>> is_parameter = tensor_info.is_parameter
|
||||
"""
|
||||
|
||||
return self.instance.get_is_parameter()
|
||||
|
||||
class TensorData():
|
||||
"""
|
||||
TensorData class.
|
||||
|
||||
Args:
|
||||
data_ptr (byte): Data pointer.
|
||||
data_size (int): Size of data in bytes.
|
||||
dtype (int): An encoding representing the type of TensorData.
|
||||
shape (list): Shape of tensor.
|
||||
|
||||
Examples:
|
||||
>>> from mindspore.ccsrc.debug.debugger.offline_debug import dbg_services
|
||||
>>> tensor_data = dbg_services.TensorData(data_ptr=b'\xba\xd0\xba\xd0',
|
||||
>>> data_size=4,
|
||||
>>> dtype=0,
|
||||
>>> shape=[2, 2])
|
||||
"""
|
||||
|
||||
@check_tensor_data_init
|
||||
def __init__(self, data_ptr, data_size, dtype, shape):
|
||||
self.instance = cds.tensor_data(data_ptr, data_size, dtype, shape)
|
||||
|
||||
@property
|
||||
def data_ptr(self):
|
||||
"""
|
||||
Function to receive TensorData data_ptr.
|
||||
|
||||
Returns:
|
||||
data_ptr of TensorData instance (byte).
|
||||
|
||||
Examples:
|
||||
>>> from mindspore.ccsrc.debug.debugger.offline_debug import dbg_services
|
||||
>>> tensor_data = dbg_services.TensorData(data_ptr=b'\xba\xd0\xba\xd0',
|
||||
>>> data_size=4,
|
||||
>>> dtype=0,
|
||||
>>> shape=[2, 2])
|
||||
>>> data_ptr = tensor_data.data_ptr
|
||||
"""
|
||||
|
||||
return self.instance.get_data_ptr()
|
||||
|
||||
@property
|
||||
def data_size(self):
|
||||
"""
|
||||
Function to receive TensorData data_size.
|
||||
|
||||
Returns:
|
||||
data_size of TensorData instance (int).
|
||||
|
||||
Examples:
|
||||
>>> from mindspore.ccsrc.debug.debugger.offline_debug import dbg_services
|
||||
>>> tensor_data = dbg_services.TensorData(data_ptr=b'\xba\xd0\xba\xd0',
|
||||
>>> data_size=4,
|
||||
>>> dtype=0,
|
||||
>>> shape=[2, 2])
|
||||
>>> data_size = tensor_data.data_size
|
||||
"""
|
||||
|
||||
return self.instance.get_data_size()
|
||||
|
||||
@property
|
||||
def dtype(self):
|
||||
"""
|
||||
Function to receive TensorData dtype.
|
||||
|
||||
Returns:
|
||||
dtype of TensorData instance (int).
|
||||
|
||||
Examples:
|
||||
>>> from mindspore.ccsrc.debug.debugger.offline_debug import dbg_services
|
||||
>>> tensor_data = dbg_services.TensorData(data_ptr=b'\xba\xd0\xba\xd0',
|
||||
>>> data_size=4,
|
||||
>>> dtype=0,
|
||||
>>> shape=[2, 2])
|
||||
>>> dtype = tensor_data.dtype
|
||||
"""
|
||||
|
||||
return self.instance.get_dtype()
|
||||
|
||||
@property
|
||||
def shape(self):
|
||||
"""
|
||||
Function to receive TensorData shape.
|
||||
|
||||
Returns:
|
||||
shape of TensorData instance (list).
|
||||
|
||||
Examples:
|
||||
>>> from mindspore.ccsrc.debug.debugger.offline_debug import dbg_services
|
||||
>>> tensor_data = dbg_services.TensorData(data_ptr=b'\xba\xd0\xba\xd0',
|
||||
>>> data_size=4,
|
||||
>>> dtype=0,
|
||||
>>> shape=[2, 2])
|
||||
>>> shape = tensor_data.shape
|
||||
"""
|
||||
|
||||
return self.instance.get_shape()
|
||||
|
||||
class WatchpointHit():
|
||||
"""
|
||||
WatchpointHit class.
|
||||
|
||||
Args:
|
||||
name (str): Name of WatchpointHit instance.
|
||||
slot (int): The numerical label of an output.
|
||||
condition (int): A representation of the condition to be checked.
|
||||
watchpoint_id (int): Watchpoint id.
|
||||
parameters (list): A list of all parameters for WatchpointHit instance.
|
||||
Parameters have to be instances of Parameter class.
|
||||
error_code (int): An explanation of certain scenarios where watchpoint could not be checked.
|
||||
device_id (int): Device id where the watchpoint is hit.
|
||||
root_graph_id (int): Root graph id where the watchpoint is hit.
|
||||
|
||||
Examples:
|
||||
>>> from mindspore.ccsrc.debug.debugger.offline_debug import dbg_services
|
||||
>>> watchpoint_hit = dbg_services.WatchpointHit(name="hit1",
|
||||
>>> slot=1,
|
||||
>>> condition=2,
|
||||
>>> watchpoint_id=3,
|
||||
>>> parameters=[param1, param2],
|
||||
>>> error_code=0,
|
||||
>>> device_id=1,
|
||||
>>> root_graph_id=1)
|
||||
"""
|
||||
|
||||
@check_watchpoint_hit_init
|
||||
def __init__(self, name, slot, condition, watchpoint_id, parameters, error_code, device_id, root_graph_id):
|
||||
parameter_list_inst = []
|
||||
for elem in parameters:
|
||||
parameter_list_inst.append(elem.instance)
|
||||
self.instance = cds.watchpoint_hit(name, slot, condition, watchpoint_id,
|
||||
parameter_list_inst, error_code, device_id, root_graph_id)
|
||||
|
||||
@property
|
||||
def name(self):
|
||||
"""
|
||||
Function to receive WatchpointHit name.
|
||||
|
||||
Returns:
|
||||
name of WatchpointHit instance (str).
|
||||
|
||||
Examples:
|
||||
>>> from mindspore.ccsrc.debug.debugger.offline_debug import dbg_services
|
||||
>>> watchpoint_hit = dbg_services.WatchpointHit(name="hit1",
|
||||
>>> slot=1,
|
||||
>>> condition=2,
|
||||
>>> watchpoint_id=3,
|
||||
>>> parameters=[param1, param2],
|
||||
>>> error_code=0,
|
||||
>>> device_id=1,
|
||||
>>> root_graph_id=1)
|
||||
>>> name = watchpoint_hit.name
|
||||
"""
|
||||
|
||||
return self.instance.get_name()
|
||||
|
||||
@property
|
||||
def slot(self):
|
||||
"""
|
||||
Function to receive WatchpointHit slot.
|
||||
|
||||
Returns:
|
||||
slot of WatchpointHit instance (int).
|
||||
|
||||
Examples:
|
||||
>>> from mindspore.ccsrc.debug.debugger.offline_debug import dbg_services
|
||||
>>> watchpoint_hit = dbg_services.WatchpointHit(name="hit1",
|
||||
>>> slot=1,
|
||||
>>> condition=2,
|
||||
>>> watchpoint_id=3,
|
||||
>>> parameters=[param1, param2],
|
||||
>>> error_code=0,
|
||||
>>> device_id=1,
|
||||
>>> root_graph_id=1)
|
||||
>>> slot = watchpoint_hit.slot
|
||||
"""
|
||||
|
||||
return self.instance.get_slot()
|
||||
|
||||
@property
|
||||
def condition(self):
|
||||
"""
|
||||
Function to receive WatchpointHit condition.
|
||||
|
||||
Returns:
|
||||
condition of WatchpointHit instance (int).
|
||||
|
||||
Examples:
|
||||
>>> from mindspore.ccsrc.debug.debugger.offline_debug import dbg_services
|
||||
>>> watchpoint_hit = dbg_services.WatchpointHit(name="hit1",
|
||||
>>> slot=1,
|
||||
>>> condition=2,
|
||||
>>> watchpoint_id=3,
|
||||
>>> parameters=[param1, param2],
|
||||
>>> error_code=0,
|
||||
>>> device_id=1,
|
||||
>>> root_graph_id=1)
|
||||
>>> condition = watchpoint_hit.condition
|
||||
"""
|
||||
|
||||
return self.instance.get_condition()
|
||||
|
||||
@property
|
||||
def watchpoint_id(self):
|
||||
"""
|
||||
Function to receive WatchpointHit watchpoint_id.
|
||||
|
||||
Returns:
|
||||
watchpoint_id of WatchpointHit instance (int).
|
||||
|
||||
Examples:
|
||||
>>> from mindspore.ccsrc.debug.debugger.offline_debug import dbg_services
|
||||
>>> watchpoint_hit = dbg_services.WatchpointHit(name="hit1",
|
||||
>>> slot=1,
|
||||
>>> condition=2,
|
||||
>>> watchpoint_id=3,
|
||||
>>> parameters=[param1, param2],
|
||||
>>> error_code=0,
|
||||
>>> device_id=1,
|
||||
>>> root_graph_id=1)
|
||||
>>> watchpoint_id = watchpoint_hit.watchpoint_id
|
||||
"""
|
||||
|
||||
return self.instance.get_watchpoint_id()
|
||||
|
||||
@property
|
||||
def parameters(self):
|
||||
"""
|
||||
Function to receive WatchpointHit parameters.
|
||||
|
||||
Returns:
|
||||
List of parameters of WatchpointHit instance (list).
|
||||
|
||||
Examples:
|
||||
>>> from mindspore.ccsrc.debug.debugger.offline_debug import dbg_services
|
||||
>>> watchpoint_hit = dbg_services.WatchpointHit(name="hit1",
|
||||
>>> slot=1,
|
||||
>>> condition=2,
|
||||
>>> watchpoint_id=3,
|
||||
>>> parameters=[param1, param2],
|
||||
>>> error_code=0,
|
||||
>>> device_id=1,
|
||||
>>> root_graph_id=1)
|
||||
>>> parameters = watchpoint_hit.parameters
|
||||
"""
|
||||
|
||||
params = self.instance.get_parameters()
|
||||
param_list = []
|
||||
for elem in params:
|
||||
tmp = Parameter(elem.get_name(),
|
||||
elem.get_disabled(),
|
||||
elem.get_value(),
|
||||
elem.get_hit(),
|
||||
elem.get_actual_value())
|
||||
param_list.append(tmp)
|
||||
return param_list
|
||||
|
||||
@property
|
||||
def error_code(self):
|
||||
"""
|
||||
Function to receive WatchpointHit error_code.
|
||||
|
||||
Returns:
|
||||
error_code of WatchpointHit instance (int).
|
||||
|
||||
Examples:
|
||||
>>> from mindspore.ccsrc.debug.debugger.offline_debug import dbg_services
|
||||
>>> watchpoint_hit = dbg_services.WatchpointHit(name="hit1",
|
||||
>>> slot=1,
|
||||
>>> condition=2,
|
||||
>>> watchpoint_id=3,
|
||||
>>> parameters=[param1, param2],
|
||||
>>> error_code=0,
|
||||
>>> device_id=1,
|
||||
>>> root_graph_id=1)
|
||||
>>> error_code = watchpoint_hit.error_code
|
||||
"""
|
||||
|
||||
return self.instance.get_error_code()
|
||||
|
||||
@property
|
||||
def device_id(self):
|
||||
"""
|
||||
Function to receive WatchpointHit device_id.
|
||||
|
||||
Returns:
|
||||
device_id of WatchpointHit instance (int).
|
||||
|
||||
Examples:
|
||||
>>> from mindspore.ccsrc.debug.debugger.offline_debug import dbg_services
|
||||
>>> watchpoint_hit = dbg_services.WatchpointHit(name="hit1",
|
||||
>>> slot=1,
|
||||
>>> condition=2,
|
||||
>>> watchpoint_id=3,
|
||||
>>> parameters=[param1, param2],
|
||||
>>> error_code=0,
|
||||
>>> device_id=1,
|
||||
>>> root_graph_id=1)
|
||||
>>> device_id = watchpoint_hit.device_id
|
||||
"""
|
||||
|
||||
return self.instance.get_device_id()
|
||||
|
||||
@property
|
||||
def root_graph_id(self):
|
||||
"""
|
||||
Function to receive WatchpointHit root_graph_id.
|
||||
|
||||
Returns:
|
||||
root_graph_id of WatchpointHit instance (int).
|
||||
|
||||
Examples:
|
||||
>>> from mindspore.ccsrc.debug.debugger.offline_debug import dbg_services
|
||||
>>> watchpoint_hit = dbg_services.WatchpointHit(name="hit1",
|
||||
>>> slot=1,
|
||||
>>> condition=2,
|
||||
>>> watchpoint_id=3,
|
||||
>>> parameters=[param1, param2],
|
||||
>>> error_code=0,
|
||||
>>> device_id=1,
|
||||
>>> root_graph_id=1)
|
||||
>>> root_graph_id = watchpoint_hit.root_graph_id
|
||||
"""
|
||||
|
||||
return self.instance.get_root_graph_id()
|
||||
|
||||
class Parameter():
|
||||
"""
|
||||
Parameter class.
|
||||
|
||||
Args:
|
||||
name (str): Name of the parameter.
|
||||
disabled (bool): Whether parameter is used in backend.
|
||||
value (float): Threshold value of the parameter.
|
||||
hit (bool): Whether this parameter triggered watchpoint (default is False).
|
||||
actual_value (float): Actual value of the parameter (default is 0.0).
|
||||
|
||||
Examples:
|
||||
>>> from mindspore.ccsrc.debug.debugger.offline_debug import dbg_services
|
||||
>>> parameter = dbg_services.Parameter(name="param",
|
||||
>>> disabled=False,
|
||||
>>> value=0.0,
|
||||
>>> hit=False,
|
||||
>>> actual_value=0.0)
|
||||
"""
|
||||
|
||||
@check_parameter_init
|
||||
def __init__(self, name, disabled, value, hit=False, actual_value=0.0):
|
||||
self.instance = cds.parameter(name, disabled, value, hit, actual_value)
|
||||
|
||||
@property
|
||||
def name(self):
|
||||
"""
|
||||
Function to receive Parameter name.
|
||||
|
||||
Returns:
|
||||
name of Parameter instance (str).
|
||||
|
||||
Examples:
|
||||
>>> from mindspore.ccsrc.debug.debugger.offline_debug import dbg_services
|
||||
>>> parameter = dbg_services.Parameter(name="param",
|
||||
>>> disabled=False,
|
||||
>>> value=0.0,
|
||||
>>> hit=False,
|
||||
>>> name = watchpoint_hit.name
|
||||
"""
|
||||
|
||||
return self.instance.get_name()
|
||||
|
||||
@property
|
||||
def disabled(self):
|
||||
"""
|
||||
Function to receive Parameter disabled value.
|
||||
|
||||
Returns:
|
||||
disabled of Parameter instance (bool).
|
||||
|
||||
Examples:
|
||||
>>> from mindspore.ccsrc.debug.debugger.offline_debug import dbg_services
|
||||
>>> parameter = dbg_services.Parameter(name="param",
|
||||
>>> disabled=False,
|
||||
>>> value=0.0,
|
||||
>>> hit=False,
|
||||
>>> disabled = watchpoint_hit.disabled
|
||||
"""
|
||||
|
||||
return self.instance.get_disabled()
|
||||
|
||||
@property
|
||||
def value(self):
|
||||
"""
|
||||
Function to receive Parameter value.
|
||||
|
||||
Returns:
|
||||
value of Parameter instance (float).
|
||||
|
||||
Examples:
|
||||
>>> from mindspore.ccsrc.debug.debugger.offline_debug import dbg_services
|
||||
>>> parameter = dbg_services.Parameter(name="param",
|
||||
>>> disabled=False,
|
||||
>>> value=0.0,
|
||||
>>> hit=False,
|
||||
>>> value = watchpoint_hit.value
|
||||
"""
|
||||
|
||||
return self.instance.get_value()
|
||||
|
||||
@property
|
||||
def hit(self):
|
||||
"""
|
||||
Function to receive Parameter hit value.
|
||||
|
||||
Returns:
|
||||
hit of Parameter instance (bool).
|
||||
|
||||
Examples:
|
||||
>>> from mindspore.ccsrc.debug.debugger.offline_debug import dbg_services
|
||||
>>> parameter = dbg_services.Parameter(name="param",
|
||||
>>> disabled=False,
|
||||
>>> value=0.0,
|
||||
>>> hit=False,
|
||||
>>> hit = watchpoint_hit.hit
|
||||
"""
|
||||
|
||||
return self.instance.get_hit()
|
||||
|
||||
@property
|
||||
def actual_value(self):
|
||||
"""
|
||||
Function to receive Parameter actual_value value.
|
||||
|
||||
Returns:
|
||||
actual_value of Parameter instance (float).
|
||||
|
||||
Examples:
|
||||
>>> from mindspore.ccsrc.debug.debugger.offline_debug import dbg_services
|
||||
>>> parameter = dbg_services.Parameter(name="param",
|
||||
>>> disabled=False,
|
||||
>>> value=0.0,
|
||||
>>> hit=False,
|
||||
>>> actual_value = watchpoint_hit.actual_value
|
||||
"""
|
||||
|
||||
return self.instance.get_actual_value()
|
|
@ -0,0 +1,24 @@
|
|||
python sync_trans_false_read_tensors.py > sync_trans_false_read_tensors.actual
|
||||
diff sync_trans_false_read_tensors.actual sync_trans_false_read_tensors.expected
|
||||
if [ $? -eq 0 ]; then
|
||||
echo sync_trans_false_read_tensors PASSED
|
||||
else
|
||||
echo sync_trans_false_read_tensors FAILED
|
||||
fi
|
||||
|
||||
python sync_trans_true_read_tensors.py > sync_trans_true_read_tensors.actual
|
||||
diff sync_trans_true_read_tensors.actual sync_trans_true_read_tensors.expected
|
||||
if [ $? -eq 0 ]; then
|
||||
echo sync_trans_true_read_tensors PASSED
|
||||
else
|
||||
echo sync_trans_true_read_tensors FAILED
|
||||
fi
|
||||
|
||||
python sync_trans_false_watchpoints.py > sync_trans_false_watchpoints.actual
|
||||
diff sync_trans_false_watchpoints.actual sync_trans_false_watchpoints.expected
|
||||
if [ $? -eq 0 ]; then
|
||||
echo sync_trans_false_watchpoints PASSED
|
||||
else
|
||||
echo sync_trans_false_watchpoints FAILED
|
||||
fi
|
||||
|
|
@ -0,0 +1,70 @@
|
|||
-----------------------------------------------------------
|
||||
tensor_info_1 attributes:
|
||||
node name = Default/network-WithLossCell/_backbone-AlexNet/conv2-Conv2d/conv2.bias
|
||||
slot = 0
|
||||
iteration = 2
|
||||
device_id = None
|
||||
root_graph_id = 0
|
||||
is_parameter = True
|
||||
|
||||
tensor_data_1 attributes:
|
||||
data (printed in uint8) = [ 0 0 0 0 195 127 0 0 176 202 195 248 194 127 0 0 0 0
|
||||
0 0 0 0 0 0 0 0 0 0 0 0 0 0 8 58 196 248
|
||||
194 127 0 0 17 0 0 0 0 0 0 0 160 76 6 140 195 127
|
||||
0 0 69 0 0 0 0 0 0 0 1 0 0 0 195 127 0 0
|
||||
64 195 195 248 194 127 0 0 0 0 0 0 0 0 0 0 0 0
|
||||
0 0 0 0 0 0 88 1 196 248 194 127 0 0 18 0 0 0
|
||||
0 0 0 0 160 47 6 140 195 127 0 0 69 0 0 0 0 0
|
||||
0 0 1 0 0 0 195 127 0 0 176 203 195 248 194 127 0 0
|
||||
176 204 195 248 194 127 0 0 0 0 0 0 0 0 0 0 216 241
|
||||
195 248 194 127 0 0 19 0 0 0 0 0 0 0 96 39 6 140
|
||||
195 127 0 0 69 0 0 0 0 0 0 0 1 0 0 0 195 127
|
||||
0 0 112 52 196 248 194 127 0 0 176 52 196 248 194 127 0 0
|
||||
0 0 0 0 0 0 0 0 88 250 195 248 194 127 0 0 20 0
|
||||
0 0 0 0 0 0 128 130 5 140 195 127 0 0 69 0 0 0
|
||||
0 0 0 0 0 0 0 0 195 127 0 0 208 136 195 248 194 127
|
||||
0 0 176 202 195 248 194 127 0 0 48 52 196 248 194 127 0 0
|
||||
184 247 195 248 194 127 0 0 21 0 0 0 0 0 0 0 176 213
|
||||
4 140 195 127 0 0 69 0 0 0 0 0 0 0 0 0 0 0
|
||||
195 127 0 0 48 52 196 248 194 127 0 0 0 0 0 0 0 0
|
||||
0 0 0 0 0 0 0 0 0 0 8 249 195 248 194 127 0 0
|
||||
22 0 0 0 0 0 0 0 16 46 4 140 195 127 0 0 69 0
|
||||
0 0 0 0 0 0 1 0 0 0 195 127 0 0 64 137 195 248
|
||||
194 127 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
|
||||
0 0 88 12 196 248 194 127 0 0 23 0 0 0 0 0 0 0
|
||||
32 137 3 140 195 127 0 0 85 0 0 0 0 0 0 0 0 0
|
||||
0 0 195 127 0 0 176 202 195 248 194 127 0 0 0 0 0 0
|
||||
0 0 0 0 0 0 0 0 0 0 0 0 104 246 195 248 194 127
|
||||
0 0 24 0 0 0 0 0 0 0 48 104 15 140 195 127 0 0
|
||||
32 104 15 140 195 127 0 0]
|
||||
size in bytes = 512
|
||||
debugger dtype = 11
|
||||
shape = [128]
|
||||
-----------------------------------------------------------
|
||||
tensor_info_2 attributes:
|
||||
node name = Default/network-WithLossCell/_backbone-AlexNet/conv3-Conv2d/Conv2D-op308
|
||||
slot = 0
|
||||
iteration = 2
|
||||
device_id = None
|
||||
root_graph_id = 0
|
||||
is_parameter = False
|
||||
|
||||
tensor_data_2 attributes:
|
||||
data (printed in uint8) = [ 0 169 0 ... 152 242 63]
|
||||
size in bytes = 4153344
|
||||
debugger dtype = 11
|
||||
shape = [32, 192, 13, 13]
|
||||
-----------------------------------------------------------
|
||||
tensor_info_3 attributes:
|
||||
node name = Default/network-WithLossCell/_backbone-AlexNet/ReLUV2-op300
|
||||
slot = 1
|
||||
iteration = 2
|
||||
device_id = None
|
||||
root_graph_id = 0
|
||||
is_parameter = False
|
||||
|
||||
tensor_data_3 attributes:
|
||||
data (printed in uint8) = [ 0 169 0 ... 217 4 52]
|
||||
size in bytes = 831744
|
||||
debugger dtype = 8
|
||||
shape = [207936]
|
|
@ -0,0 +1,74 @@
|
|||
# Copyright 2021 Huawei Technologies Co., Ltd
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
# ==============================================================================
|
||||
"""
|
||||
Read tensor test script for offline debugger APIs.
|
||||
"""
|
||||
|
||||
import mindspore.offline_debug.dbg_services as d
|
||||
import numpy as np
|
||||
|
||||
|
||||
def main():
|
||||
|
||||
debugger_backend = d.DbgServices(
|
||||
dump_file_path="/home/jtzanaka/dumps/sync_trans_false/032421/alexnet")
|
||||
|
||||
_ = debugger_backend.initialize(
|
||||
net_name="Network Name goes here!", is_sync_mode=True)
|
||||
|
||||
# parameter
|
||||
info1 = d.TensorInfo(node_name="Default/network-WithLossCell/_backbone-AlexNet/conv2-Conv2d/conv2.bias",
|
||||
slot=0, iteration=2, device_id=0, root_graph_id=0, is_parameter=True)
|
||||
# output tensor with zero slot
|
||||
info2 = d.TensorInfo(node_name="Default/network-WithLossCell/_backbone-AlexNet/conv3-Conv2d/Conv2D-op308",
|
||||
slot=0, iteration=2, device_id=0, root_graph_id=0, is_parameter=False)
|
||||
# output tensor with non-zero slot
|
||||
info3 = d.TensorInfo(node_name="Default/network-WithLossCell/_backbone-AlexNet/ReLUV2-op300",
|
||||
slot=1, iteration=2, device_id=0, root_graph_id=0, is_parameter=False)
|
||||
|
||||
tensor_info = [info1, info2, info3]
|
||||
|
||||
tensor_data = debugger_backend.read_tensors(tensor_info)
|
||||
|
||||
print_read_tensors(tensor_info, tensor_data)
|
||||
|
||||
|
||||
def print_read_tensors(tensor_info, tensor_data):
|
||||
"""Print read tensors."""
|
||||
for x, _ in enumerate(tensor_info):
|
||||
print("-----------------------------------------------------------")
|
||||
print("tensor_info_" + str(x+1) + " attributes:")
|
||||
print("node name = ", tensor_info[x].node_name)
|
||||
print("slot = ", tensor_info[x].slot)
|
||||
print("iteration = ", tensor_info[x].iteration)
|
||||
print("device_id = ", tensor_info[x].device_id)
|
||||
print("root_graph_id = ", tensor_info[x].root_graph_id)
|
||||
print("is_parameter = ", tensor_info[x].is_parameter)
|
||||
print()
|
||||
print("tensor_data_" + str(x+1) + " attributes:")
|
||||
print("data (printed in uint8) = ", np.frombuffer(
|
||||
tensor_data[x].data_ptr, np.uint8, tensor_data[x].data_size))
|
||||
py_byte_size = len(tensor_data[x].data_ptr)
|
||||
c_byte_size = tensor_data[x].data_size
|
||||
if c_byte_size != py_byte_size:
|
||||
print("The python byte size of ", py_byte_size,
|
||||
" does not match the C++ byte size of ", c_byte_size)
|
||||
print("size in bytes = ", tensor_data[x].data_size)
|
||||
print("debugger dtype = ", tensor_data[x].dtype)
|
||||
print("shape = ", tensor_data[x].shape)
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
|
@ -0,0 +1,33 @@
|
|||
-----------------------------------------------------------
|
||||
watchpoint_hit for test_1 attributes:
|
||||
name = Default/network-WithLossCell/_backbone-AlexNet/conv3-Conv2d/Conv2D-op308
|
||||
slot = 0
|
||||
condition = 6
|
||||
watchpoint_id = 1
|
||||
parameter 0 name = param
|
||||
parameter 0 disabled = False
|
||||
parameter 0 value = 0.0
|
||||
parameter 0 hit = True
|
||||
parameter 0 actual_value = -2.429065704345703
|
||||
error code = 0
|
||||
device_id = 0
|
||||
root_graph_id = 0
|
||||
-----------------------------------------------------------
|
||||
watchpoint_hit for test_4 attributes:
|
||||
name = Default/network-WithLossCell/_backbone-AlexNet/fc3-Dense/Parameter[6]_11/fc3.bias
|
||||
slot = 0
|
||||
condition = 18
|
||||
watchpoint_id = 3
|
||||
parameter 0 name = abs_mean_update_ratio_gt
|
||||
parameter 0 disabled = False
|
||||
parameter 0 value = 0.0
|
||||
parameter 0 hit = True
|
||||
parameter 0 actual_value = 1.793662034335766e-35
|
||||
parameter 1 name = epsilon
|
||||
parameter 1 disabled = True
|
||||
parameter 1 value = 0.0
|
||||
parameter 1 hit = False
|
||||
parameter 1 actual_value = 0.0
|
||||
error code = 0
|
||||
device_id = 0
|
||||
root_graph_id = 0
|
|
@ -0,0 +1,109 @@
|
|||
# Copyright 2021 Huawei Technologies Co., Ltd
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
# ==============================================================================
|
||||
"""
|
||||
Watchpoints test script for offline debugger APIs.
|
||||
"""
|
||||
|
||||
import mindspore.offline_debug.dbg_services as d
|
||||
|
||||
|
||||
def main():
|
||||
|
||||
debugger_backend = d.DbgServices(
|
||||
dump_file_path="/home/jtzanaka/dumps/sync_trans_false/032421/alexnet")
|
||||
|
||||
_ = debugger_backend.initialize(
|
||||
net_name="Network Name goes here!", is_sync_mode=True)
|
||||
|
||||
# NOTES:
|
||||
# -> watch_condition=6 is MIN_LT
|
||||
# -> watch_condition=18 is CHANGE_TOO_LARGE
|
||||
|
||||
# test 1: watchpoint set and hit (watch_condition=6)
|
||||
param1 = d.Parameter(name="param", disabled=False, value=0.0)
|
||||
_ = debugger_backend.add_watchpoint(watchpoint_id=1, watch_condition=6,
|
||||
check_node_list={"Default/network-WithLossCell/_backbone-AlexNet/conv3-Conv2d/"
|
||||
"Conv2D-op308":
|
||||
{"device_id": [0], "root_graph_id": [0], "is_parameter": False
|
||||
}}, parameter_list=[param1])
|
||||
|
||||
watchpoint_hits_test_1 = debugger_backend.check_watchpoints(iteration=2)
|
||||
if len(watchpoint_hits_test_1) != 1:
|
||||
print("ERROR -> test 1: watchpoint set but not hit just once")
|
||||
print_watchpoint_hits(watchpoint_hits_test_1, 1)
|
||||
|
||||
# test 2: watchpoint remove and ensure it's not hit
|
||||
_ = debugger_backend.remove_watchpoint(watchpoint_id=1)
|
||||
watchpoint_hits_test_2 = debugger_backend.check_watchpoints(iteration=2)
|
||||
if watchpoint_hits_test_2:
|
||||
print("ERROR -> test 2: watchpoint removed but hit")
|
||||
|
||||
# test 3: watchpoint set and not hit, then remove
|
||||
param2 = d.Parameter(name="param", disabled=False, value=-1000.0)
|
||||
_ = debugger_backend.add_watchpoint(watchpoint_id=2, watch_condition=6,
|
||||
check_node_list={"Default/network-WithLossCell/_backbone-AlexNet/conv3-Conv2d/"
|
||||
"Conv2D-op308":
|
||||
{"device_id": [0], "root_graph_id": [0], "is_parameter": False
|
||||
}}, parameter_list=[param2])
|
||||
|
||||
watchpoint_hits_test_3 = debugger_backend.check_watchpoints(iteration=2)
|
||||
if watchpoint_hits_test_3:
|
||||
print("ERROR -> test 3: watchpoint set but not supposed to be hit")
|
||||
_ = debugger_backend.remove_watchpoint(watchpoint_id=2)
|
||||
|
||||
# test 4: weight change watchpoint set and hit
|
||||
param_abs_mean_update_ratio_gt = d.Parameter(
|
||||
name="abs_mean_update_ratio_gt", disabled=False, value=0.0)
|
||||
param_epsilon = d.Parameter(name="epsilon", disabled=True, value=0.0)
|
||||
_ = debugger_backend.add_watchpoint(watchpoint_id=3, watch_condition=18,
|
||||
check_node_list={"Default/network-WithLossCell/_backbone-AlexNet/fc3-Dense/"
|
||||
"Parameter[6]_11/fc3.bias":
|
||||
{"device_id": [0], "root_graph_id": [0], "is_parameter": True
|
||||
}}, parameter_list=[param_abs_mean_update_ratio_gt,
|
||||
param_epsilon])
|
||||
|
||||
watchpoint_hits_test_4 = debugger_backend.check_watchpoints(iteration=3)
|
||||
if len(watchpoint_hits_test_4) != 1:
|
||||
print("ERROR -> test 4: watchpoint weight change set but not hit just once")
|
||||
print_watchpoint_hits(watchpoint_hits_test_4, 4)
|
||||
|
||||
|
||||
def print_watchpoint_hits(watchpoint_hits, test_id):
|
||||
"""Print watchpoint hits."""
|
||||
for x, _ in enumerate(watchpoint_hits):
|
||||
print("-----------------------------------------------------------")
|
||||
print("watchpoint_hit for test_%u attributes:" % test_id)
|
||||
print("name = ", watchpoint_hits[x].name)
|
||||
print("slot = ", watchpoint_hits[x].slot)
|
||||
print("condition = ", watchpoint_hits[x].condition)
|
||||
print("watchpoint_id = ", watchpoint_hits[x].watchpoint_id)
|
||||
for p, _ in enumerate(watchpoint_hits[x].parameters):
|
||||
print("parameter ", p, " name = ",
|
||||
watchpoint_hits[x].parameters[p].name)
|
||||
print("parameter ", p, " disabled = ",
|
||||
watchpoint_hits[x].parameters[p].disabled)
|
||||
print("parameter ", p, " value = ",
|
||||
watchpoint_hits[x].parameters[p].value)
|
||||
print("parameter ", p, " hit = ",
|
||||
watchpoint_hits[x].parameters[p].hit)
|
||||
print("parameter ", p, " actual_value = ",
|
||||
watchpoint_hits[x].parameters[p].actual_value)
|
||||
print("error code = ", watchpoint_hits[x].error_code)
|
||||
print("device_id = ", watchpoint_hits[x].device_id)
|
||||
print("root_graph_id = ", watchpoint_hits[x].root_graph_id)
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
|
@ -0,0 +1,70 @@
|
|||
-----------------------------------------------------------
|
||||
tensor_info_1 attributes:
|
||||
node name = Default/network-WithLossCell/_backbone-AlexNet/conv2-Conv2d/conv2.bias
|
||||
slot = 0
|
||||
iteration = 2
|
||||
device_id = None
|
||||
root_graph_id = 0
|
||||
is_parameter = True
|
||||
|
||||
tensor_data_1 attributes:
|
||||
data (printed in uint8) = [ 1 0 0 0 195 127 0 0 80 58 118 65 195 127 0 0 0 0
|
||||
0 0 0 0 0 0 0 0 0 0 0 0 0 0 40 186 117 65
|
||||
195 127 0 0 5 0 0 0 0 0 0 0 160 76 6 204 195 127
|
||||
0 0 69 0 0 0 0 0 0 0 1 0 0 0 195 127 0 0
|
||||
48 135 117 65 195 127 0 0 16 58 118 65 195 127 0 0 144 58
|
||||
118 65 195 127 0 0 168 186 117 65 195 127 0 0 6 0 0 0
|
||||
0 0 0 0 160 47 6 204 195 127 0 0 69 0 0 0 0 0
|
||||
0 0 1 0 0 0 195 127 0 0 80 58 118 65 195 127 0 0
|
||||
0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 184 249
|
||||
117 65 195 127 0 0 7 0 0 0 0 0 0 0 96 39 6 204
|
||||
195 127 0 0 69 0 0 0 0 0 0 0 1 0 0 0 195 127
|
||||
0 0 224 218 117 65 195 127 0 0 0 0 0 0 0 0 0 0
|
||||
224 219 117 65 195 127 0 0 200 17 118 65 195 127 0 0 8 0
|
||||
0 0 0 0 0 0 128 130 5 204 195 127 0 0 69 0 0 0
|
||||
0 0 0 0 1 0 0 0 195 127 0 0 120 233 255 59 196 127
|
||||
0 0 224 217 117 65 195 127 0 0 224 214 117 65 195 127 0 0
|
||||
120 250 117 65 195 127 0 0 9 0 0 0 0 0 0 0 176 213
|
||||
4 204 195 127 0 0 69 0 0 0 0 0 0 0 1 0 0 0
|
||||
195 127 0 0 240 66 118 65 195 127 0 0 160 218 117 65 195 127
|
||||
0 0 224 215 117 65 195 127 0 0 40 9 118 65 195 127 0 0
|
||||
10 0 0 0 0 0 0 0 16 46 4 204 195 127 0 0 69 0
|
||||
0 0 0 0 0 0 1 0 0 0 195 127 0 0 208 59 118 65
|
||||
195 127 0 0 0 0 0 0 0 0 0 0 96 218 117 65 195 127
|
||||
0 0 56 251 117 65 195 127 0 0 11 0 0 0 0 0 0 0
|
||||
32 137 3 204 195 127 0 0 85 0 0 0 0 0 0 0 1 0
|
||||
0 0 195 127 0 0 224 214 117 65 195 127 0 0 144 59 118 65
|
||||
195 127 0 0 160 214 117 65 195 127 0 0 136 62 118 65 195 127
|
||||
0 0 12 0 0 0 0 0 0 0 48 104 15 204 195 127 0 0
|
||||
32 104 15 204 195 127 0 0]
|
||||
size in bytes = 512
|
||||
debugger dtype = 11
|
||||
shape = [128]
|
||||
-----------------------------------------------------------
|
||||
tensor_info_2 attributes:
|
||||
node name = Default/network-WithLossCell/_backbone-AlexNet/conv3-Conv2d/Conv2D-op308
|
||||
slot = 0
|
||||
iteration = 2
|
||||
device_id = None
|
||||
root_graph_id = 0
|
||||
is_parameter = False
|
||||
|
||||
tensor_data_2 attributes:
|
||||
data (printed in uint8) = [206 239 74 ... 53 201 62]
|
||||
size in bytes = 4153344
|
||||
debugger dtype = 11
|
||||
shape = [32, 192, 13, 13]
|
||||
-----------------------------------------------------------
|
||||
tensor_info_3 attributes:
|
||||
node name = Default/network-WithLossCell/_backbone-AlexNet/ReLUV2-op300
|
||||
slot = 1
|
||||
iteration = 2
|
||||
device_id = None
|
||||
root_graph_id = 0
|
||||
is_parameter = False
|
||||
|
||||
tensor_data_3 attributes:
|
||||
data (printed in uint8) = [206 239 74 ... 16 239 51]
|
||||
size in bytes = 831744
|
||||
debugger dtype = 8
|
||||
shape = [207936]
|
|
@ -0,0 +1,74 @@
|
|||
# Copyright 2021 Huawei Technologies Co., Ltd
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
# ==============================================================================
|
||||
"""
|
||||
Read tensor test script for offline debugger APIs.
|
||||
"""
|
||||
|
||||
import mindspore.offline_debug.dbg_services as d
|
||||
import numpy as np
|
||||
|
||||
|
||||
def main():
|
||||
|
||||
debugger_backend = d.DbgServices(
|
||||
dump_file_path="/home/jtzanaka/dumps/sync_trans_true/032421/alexnet")
|
||||
|
||||
_ = debugger_backend.initialize(
|
||||
net_name="Network Name goes here!", is_sync_mode=True)
|
||||
|
||||
# parameter
|
||||
info1 = d.TensorInfo(node_name="Default/network-WithLossCell/_backbone-AlexNet/conv2-Conv2d/conv2.bias",
|
||||
slot=0, iteration=2, device_id=0, root_graph_id=0, is_parameter=True)
|
||||
# output tensor with zero slot
|
||||
info2 = d.TensorInfo(node_name="Default/network-WithLossCell/_backbone-AlexNet/conv3-Conv2d/Conv2D-op308",
|
||||
slot=0, iteration=2, device_id=0, root_graph_id=0, is_parameter=False)
|
||||
# output tensor with non-zero slot
|
||||
info3 = d.TensorInfo(node_name="Default/network-WithLossCell/_backbone-AlexNet/ReLUV2-op300",
|
||||
slot=1, iteration=2, device_id=0, root_graph_id=0, is_parameter=False)
|
||||
|
||||
tensor_info = [info1, info2, info3]
|
||||
|
||||
tensor_data = debugger_backend.read_tensors(tensor_info)
|
||||
|
||||
print_read_tensors(tensor_info, tensor_data)
|
||||
|
||||
|
||||
def print_read_tensors(tensor_info, tensor_data):
|
||||
"""Print read tensors."""
|
||||
for x, _ in enumerate(tensor_info):
|
||||
print("-----------------------------------------------------------")
|
||||
print("tensor_info_" + str(x+1) + " attributes:")
|
||||
print("node name = ", tensor_info[x].node_name)
|
||||
print("slot = ", tensor_info[x].slot)
|
||||
print("iteration = ", tensor_info[x].iteration)
|
||||
print("device_id = ", tensor_info[x].device_id)
|
||||
print("root_graph_id = ", tensor_info[x].root_graph_id)
|
||||
print("is_parameter = ", tensor_info[x].is_parameter)
|
||||
print()
|
||||
print("tensor_data_" + str(x+1) + " attributes:")
|
||||
print("data (printed in uint8) = ", np.frombuffer(
|
||||
tensor_data[x].data_ptr, np.uint8, tensor_data[x].data_size))
|
||||
py_byte_size = len(tensor_data[x].data_ptr)
|
||||
c_byte_size = tensor_data[x].data_size
|
||||
if c_byte_size != py_byte_size:
|
||||
print("The python byte size of ", py_byte_size,
|
||||
" does not match the C++ byte size of ", c_byte_size)
|
||||
print("size in bytes = ", tensor_data[x].data_size)
|
||||
print("debugger dtype = ", tensor_data[x].dtype)
|
||||
print("shape = ", tensor_data[x].shape)
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
|
@ -0,0 +1,66 @@
|
|||
/**
|
||||
* Copyright 2021 Huawei Technologies Co., Ltd
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
#include "pybind11/pybind11.h"
|
||||
#include "pybind11/stl.h"
|
||||
#include "pybind11/stl_bind.h"
|
||||
#include "debugger/offline_debug/dbg_services.h"
|
||||
|
||||
PYBIND11_MODULE(_mindspore_offline_debug, m) {
|
||||
m.doc() = "pybind11 debug services api";
|
||||
py::class_<DbgServices>(m, "DbgServices")
|
||||
.def(py::init<bool>())
|
||||
.def("Initialize", &DbgServices::Initialize)
|
||||
.def("AddWatchpoint", &DbgServices::AddWatchpoint)
|
||||
.def("RemoveWatchpoint", &DbgServices::RemoveWatchpoint)
|
||||
.def("CheckWatchpoints", &DbgServices::CheckWatchpoints)
|
||||
.def("ReadTensors", &DbgServices::ReadTensors)
|
||||
.def("GetVersion", &DbgServices::GetVersion);
|
||||
|
||||
py::class_<parameter>(m, "parameter")
|
||||
.def(py::init<std::string, bool, double, bool, double>())
|
||||
.def("get_name", ¶meter::get_name)
|
||||
.def("get_disabled", ¶meter::get_disabled)
|
||||
.def("get_value", ¶meter::get_value)
|
||||
.def("get_hit", ¶meter::get_hit)
|
||||
.def("get_actual_value", ¶meter::get_actual_value);
|
||||
|
||||
py::class_<watchpoint_hit>(m, "watchpoint_hit")
|
||||
.def(py::init<std::string, uint32_t, int, uint32_t, std::vector<parameter_t>, int32_t, uint32_t, uint32_t>())
|
||||
.def("get_name", &watchpoint_hit::get_name)
|
||||
.def("get_slot", &watchpoint_hit::get_slot)
|
||||
.def("get_condition", &watchpoint_hit::get_condition)
|
||||
.def("get_watchpoint_id", &watchpoint_hit::get_watchpoint_id)
|
||||
.def("get_parameters", &watchpoint_hit::get_parameters)
|
||||
.def("get_error_code", &watchpoint_hit::get_error_code)
|
||||
.def("get_device_id", &watchpoint_hit::get_device_id)
|
||||
.def("get_root_graph_id", &watchpoint_hit::get_root_graph_id);
|
||||
|
||||
py::class_<tensor_info>(m, "tensor_info")
|
||||
.def(py::init<std::string, uint32_t, uint32_t, uint32_t, uint32_t, bool>())
|
||||
.def("get_node_name", &tensor_info::get_node_name)
|
||||
.def("get_slot", &tensor_info::get_slot)
|
||||
.def("get_iteration", &tensor_info::get_iteration)
|
||||
.def("get_device_id", &tensor_info::get_device_id)
|
||||
.def("get_root_graph_id", &tensor_info::get_root_graph_id)
|
||||
.def("get_is_parameter", &tensor_info::get_is_parameter);
|
||||
|
||||
py::class_<tensor_data>(m, "tensor_data")
|
||||
.def(py::init<char *, uint64_t, int, std::vector<int64_t>>())
|
||||
.def("get_data_ptr", &tensor_data::get_data_ptr)
|
||||
.def("get_data_size", &tensor_data::get_data_size)
|
||||
.def("get_dtype", &tensor_data::get_dtype)
|
||||
.def("get_shape", &tensor_data::get_shape);
|
||||
}
|
|
@ -0,0 +1,123 @@
|
|||
# Copyright 2021 Huawei Technologies Co., Ltd
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
# ==============================================================================
|
||||
"""
|
||||
General Validator Helper Functions.
|
||||
"""
|
||||
import os
|
||||
import inspect
|
||||
|
||||
UINT32_MAX = 4294967295
|
||||
UINT32_MIN = 0
|
||||
UINT64_MAX = 18446744073709551615
|
||||
UINT64_MIN = 0
|
||||
|
||||
|
||||
def pad_arg_name(arg_name):
|
||||
if arg_name != "":
|
||||
arg_name = arg_name + " "
|
||||
return arg_name
|
||||
|
||||
|
||||
def check_value(arg, valid_range, arg_name=""):
|
||||
arg_name = pad_arg_name(arg_name)
|
||||
if arg < valid_range[0] or arg > valid_range[1]:
|
||||
raise ValueError(
|
||||
"Input {0}is not within the required interval of ({1} to {2}).".format(arg_name,
|
||||
valid_range[0], valid_range[1]))
|
||||
|
||||
|
||||
def check_uint32(arg, arg_name=""):
|
||||
type_check(arg, (int,), arg_name)
|
||||
check_value(arg, [UINT32_MIN, UINT32_MAX])
|
||||
|
||||
|
||||
def check_uint64(arg, arg_name=""):
|
||||
type_check(arg, (int,), arg_name)
|
||||
check_value(arg, [UINT64_MIN, UINT64_MAX])
|
||||
|
||||
|
||||
def check_dir(dataset_dir):
|
||||
if not os.path.isdir(dataset_dir) or not os.access(dataset_dir, os.R_OK):
|
||||
raise ValueError("The folder {} does not exist or permission denied!".format(dataset_dir))
|
||||
|
||||
|
||||
def parse_user_args(method, *args, **kwargs):
|
||||
"""
|
||||
Parse user arguments in a function.
|
||||
|
||||
Args:
|
||||
method (method): a callable function.
|
||||
args: user passed args.
|
||||
kwargs: user passed kwargs.
|
||||
|
||||
Returns:
|
||||
user_filled_args (list): values of what the user passed in for the arguments.
|
||||
ba.arguments (Ordered Dict): ordered dict of parameter and argument for what the user has passed.
|
||||
"""
|
||||
sig = inspect.signature(method)
|
||||
if 'self' in sig.parameters or 'cls' in sig.parameters:
|
||||
ba = sig.bind(method, *args, **kwargs)
|
||||
ba.apply_defaults()
|
||||
params = list(sig.parameters.keys())[1:]
|
||||
else:
|
||||
ba = sig.bind(*args, **kwargs)
|
||||
ba.apply_defaults()
|
||||
params = list(sig.parameters.keys())
|
||||
|
||||
user_filled_args = [ba.arguments.get(arg_value) for arg_value in params]
|
||||
return user_filled_args, ba.arguments
|
||||
|
||||
|
||||
def type_check(arg, types, arg_name):
|
||||
"""
|
||||
Check the type of the parameter.
|
||||
|
||||
Args:
|
||||
arg (Any) : any variable.
|
||||
types (tuple): tuple of all valid types for arg.
|
||||
arg_name (str): the name of arg.
|
||||
|
||||
Returns:
|
||||
Exception: when the type is not correct, otherwise nothing.
|
||||
"""
|
||||
# handle special case of booleans being a subclass of ints
|
||||
print_value = '\"\"' if repr(arg) == repr('') else arg
|
||||
|
||||
if int in types and bool not in types:
|
||||
if isinstance(arg, bool):
|
||||
raise TypeError("Argument {0} with value {1} is not of type {2}.".format(arg_name, print_value, types))
|
||||
if not isinstance(arg, types):
|
||||
raise TypeError("Argument {0} with value {1} is not of type {2}.".format(arg_name, print_value, types))
|
||||
|
||||
|
||||
def type_check_list(args, types, arg_names):
|
||||
"""
|
||||
Check the type of each parameter in the list.
|
||||
|
||||
Args:
|
||||
args (Union[list, tuple]): a list or tuple of any variable.
|
||||
types (tuple): tuple of all valid types for arg.
|
||||
arg_names (Union[list, tuple of str]): the names of args.
|
||||
|
||||
Returns:
|
||||
Exception: when the type is not correct, otherwise nothing.
|
||||
"""
|
||||
type_check(args, (list, tuple,), arg_names)
|
||||
if len(args) != len(arg_names) and not isinstance(arg_names, str):
|
||||
raise ValueError("List of arguments is not the same length as argument_names.")
|
||||
if isinstance(arg_names, str):
|
||||
arg_names = ["{0}[{1}]".format(arg_names, i) for i in range(len(args))]
|
||||
for arg, arg_name in zip(args, arg_names):
|
||||
type_check(arg, types, arg_name)
|
|
@ -0,0 +1,223 @@
|
|||
# Copyright 2021 Huawei Technologies Co., Ltd
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
# ==============================================================================
|
||||
"""
|
||||
Validator Functions for Offline Debugger APIs.
|
||||
"""
|
||||
from functools import wraps
|
||||
|
||||
import dbg_services as cds
|
||||
from mi_validator_helpers import parse_user_args, type_check, type_check_list, check_dir, check_uint32, check_uint64
|
||||
|
||||
|
||||
def check_init(method):
|
||||
"""Wrapper method to check the parameters of DbgServices init."""
|
||||
|
||||
@wraps(method)
|
||||
def new_method(self, *args, **kwargs):
|
||||
[dump_file_path, verbose], _ = parse_user_args(method, *args, **kwargs)
|
||||
|
||||
type_check(dump_file_path, (str,), "dump_file_path")
|
||||
type_check(verbose, (bool,), "verbose")
|
||||
check_dir(dump_file_path)
|
||||
|
||||
return method(self, *args, **kwargs)
|
||||
|
||||
return new_method
|
||||
|
||||
|
||||
def check_initialize(method):
|
||||
"""Wrapper method to check the parameters of DbgServices Initialize method."""
|
||||
|
||||
@wraps(method)
|
||||
def new_method(self, *args, **kwargs):
|
||||
[net_name, is_sync_mode], _ = parse_user_args(method, *args, **kwargs)
|
||||
|
||||
type_check(net_name, (str,), "net_name")
|
||||
type_check(is_sync_mode, (bool,), "is_sync_mode")
|
||||
|
||||
return method(self, *args, **kwargs)
|
||||
|
||||
return new_method
|
||||
|
||||
|
||||
def check_add_watchpoint(method):
|
||||
"""Wrapper method to check the parameters of DbgServices AddWatchpoint."""
|
||||
|
||||
@wraps(method)
|
||||
def new_method(self, *args, **kwargs):
|
||||
[id_value, watch_condition, check_node_list, parameter_list], _ = parse_user_args(method, *args, **kwargs)
|
||||
|
||||
check_uint32(id_value, "id")
|
||||
check_uint32(watch_condition, "watch_condition")
|
||||
type_check(check_node_list, (dict,), "check_node_list")
|
||||
for node_name, node_info in check_node_list.items():
|
||||
type_check(node_name, (str,), "node_name")
|
||||
type_check(node_info, (dict,), "node_info")
|
||||
for info_name, info_param in node_info.items():
|
||||
type_check(info_name, (str,), "node parameter name")
|
||||
if info_name in ["device_id"]:
|
||||
for param in info_param:
|
||||
check_uint32(param, "device_id")
|
||||
elif info_name in ["root_graph_id"]:
|
||||
for param in info_param:
|
||||
check_uint32(param, "root_graph_id")
|
||||
elif info_name in ["is_parameter"]:
|
||||
type_check(info_param, (bool,), "is_parameter")
|
||||
else:
|
||||
raise ValueError("Node parameter {} is not defined.".format(info_name))
|
||||
param_names = ["param_{0}".format(i) for i in range(len(parameter_list))]
|
||||
type_check_list(parameter_list, (cds.Parameter,), param_names)
|
||||
|
||||
return method(self, *args, **kwargs)
|
||||
|
||||
return new_method
|
||||
|
||||
|
||||
def check_remove_watchpoint(method):
|
||||
"""Wrapper method to check the parameters of DbgServices RemoveWatchpoint."""
|
||||
|
||||
@wraps(method)
|
||||
def new_method(self, *args, **kwargs):
|
||||
[id_value], _ = parse_user_args(method, *args, **kwargs)
|
||||
|
||||
check_uint32(id_value, "id")
|
||||
|
||||
return method(self, *args, **kwargs)
|
||||
|
||||
return new_method
|
||||
|
||||
|
||||
def check_check_watchpoints(method):
|
||||
"""Wrapper method to check the parameters of DbgServices CheckWatchpoint."""
|
||||
|
||||
@wraps(method)
|
||||
def new_method(self, *args, **kwargs):
|
||||
[iteration], _ = parse_user_args(method, *args, **kwargs)
|
||||
|
||||
check_uint32(iteration, "iteration")
|
||||
|
||||
return method(self, *args, **kwargs)
|
||||
|
||||
return new_method
|
||||
|
||||
|
||||
def check_read_tensors(method):
|
||||
"""Wrapper method to check the parameters of DbgServices ReadTensors."""
|
||||
|
||||
@wraps(method)
|
||||
def new_method(self, *args, **kwargs):
|
||||
[info_list], _ = parse_user_args(method, *args, **kwargs)
|
||||
|
||||
info_names = ["info_{0}".format(i) for i in range(len(info_list))]
|
||||
type_check_list(info_list, (cds.TensorInfo,), info_names)
|
||||
|
||||
return method(self, *args, **kwargs)
|
||||
|
||||
return new_method
|
||||
|
||||
|
||||
def check_initialize_done(method):
|
||||
"""Wrapper method to check if initlize is done for DbgServices."""
|
||||
|
||||
@wraps(method)
|
||||
def new_method(self, *args, **kwargs):
|
||||
|
||||
if not self.initialized:
|
||||
raise RuntimeError("Inilize should be called before any other methods of DbgServices!")
|
||||
return method(self, *args, **kwargs)
|
||||
|
||||
return new_method
|
||||
|
||||
|
||||
def check_tensor_info_init(method):
|
||||
"""Wrapper method to check the parameters of DbgServices TensorInfo init."""
|
||||
|
||||
@wraps(method)
|
||||
def new_method(self, *args, **kwargs):
|
||||
[node_name, slot, iteration, device_id, root_graph_id,
|
||||
is_parameter], _ = parse_user_args(method, *args, **kwargs)
|
||||
|
||||
type_check(node_name, (str,), "node_name")
|
||||
check_uint32(slot, "slot")
|
||||
check_uint32(iteration, "iteration")
|
||||
check_uint32(device_id, "device_id")
|
||||
check_uint32(root_graph_id, "root_graph_id")
|
||||
type_check(is_parameter, (bool,), "is_parameter")
|
||||
|
||||
return method(self, *args, **kwargs)
|
||||
|
||||
return new_method
|
||||
|
||||
|
||||
def check_tensor_data_init(method):
|
||||
"""Wrapper method to check the parameters of DbgServices TensorData init."""
|
||||
|
||||
@wraps(method)
|
||||
def new_method(self, *args, **kwargs):
|
||||
[data_ptr, data_size, dtype, shape], _ = parse_user_args(method, *args, **kwargs)
|
||||
|
||||
type_check(data_ptr, (bytes,), "data_ptr")
|
||||
check_uint64(data_size, "data_size")
|
||||
type_check(dtype, (int,), "dtype")
|
||||
shape_names = ["shape_{0}".format(i) for i in range(len(shape))]
|
||||
type_check_list(shape, (int,), shape_names)
|
||||
|
||||
if len(data_ptr) != data_size:
|
||||
raise ValueError("data_ptr length ({0}) is not equal to data_size ({1}).".format(len(data_ptr), data_size))
|
||||
|
||||
return method(self, *args, **kwargs)
|
||||
|
||||
return new_method
|
||||
|
||||
|
||||
def check_watchpoint_hit_init(method):
|
||||
"""Wrapper method to check the parameters of DbgServices WatchpointHit init."""
|
||||
|
||||
@wraps(method)
|
||||
def new_method(self, *args, **kwargs):
|
||||
[name, slot, condition, watchpoint_id,
|
||||
parameters, error_code, device_id, root_graph_id], _ = parse_user_args(method, *args, **kwargs)
|
||||
|
||||
type_check(name, (str,), "name")
|
||||
check_uint32(slot, "slot")
|
||||
type_check(condition, (int,), "condition")
|
||||
check_uint32(watchpoint_id, "watchpoint_id")
|
||||
param_names = ["param_{0}".format(i) for i in range(len(parameters))]
|
||||
type_check_list(parameters, (cds.Parameter,), param_names)
|
||||
type_check(error_code, (int,), "error_code")
|
||||
check_uint32(device_id, "device_id")
|
||||
check_uint32(root_graph_id, "root_graph_id")
|
||||
|
||||
return method(self, *args, **kwargs)
|
||||
|
||||
return new_method
|
||||
|
||||
|
||||
def check_parameter_init(method):
|
||||
"""Wrapper method to check the parameters of DbgServices Parameter init."""
|
||||
|
||||
@wraps(method)
|
||||
def new_method(self, *args, **kwargs):
|
||||
[name, disabled, value, hit, actual_value], _ = parse_user_args(method, *args, **kwargs)
|
||||
|
||||
type_check(name, (str,), "name")
|
||||
type_check(disabled, (bool,), "disabled")
|
||||
type_check(value, (float,), "value")
|
||||
type_check(hit, (bool,), "hit")
|
||||
type_check(actual_value, (float,), "actual_value")
|
||||
|
||||
return method(self, *args, **kwargs)
|
||||
|
||||
return new_method
|
|
@ -0,0 +1,19 @@
|
|||
/**
|
||||
* Copyright 2021 Huawei Technologies Co., Ltd
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
#include "debugger/offline_debug/offline_logger.h"
|
||||
|
||||
bool DbgLogger::verbose = false;
|
|
@ -0,0 +1,59 @@
|
|||
/**
|
||||
* Copyright 2021 Huawei Technologies Co., Ltd
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
#ifndef OFFLINE_LOGGER_H_
|
||||
#define OFFLINE_LOGGER_H_
|
||||
|
||||
#include <iostream>
|
||||
|
||||
#define MS_LOG(level) MS_LOG_##level
|
||||
|
||||
#define MS_LOG_INFO static_cast<void>(0), !(DbgLogger::verbose) ? void(0) : DbgLogger(DbgLoggerLvl::INFO) < std::cout
|
||||
|
||||
#define MS_LOG_ERROR MS_LOG_INFO
|
||||
|
||||
#define MS_LOG_DEBUG MS_LOG_INFO
|
||||
|
||||
#define MS_LOG_WARNING MS_LOG_INFO
|
||||
|
||||
#define MS_LOG_EXCEPTION \
|
||||
static_cast<void>(0), !(DbgLogger::verbose) ? void(0) : DbgLogger(DbgLoggerLvl::EXCEPTION) < std::cout
|
||||
|
||||
enum DbgLoggerLvl : int { DEBUG = 0, INFO, WARNING, ERROR, EXCEPTION };
|
||||
|
||||
class DbgLogger {
|
||||
public:
|
||||
explicit DbgLogger(DbgLoggerLvl lvl) : lvl_(lvl) {}
|
||||
~DbgLogger() = default;
|
||||
void operator<(std::ostream &os) const {
|
||||
char *dbg_log_path = getenv("OFFLINE_DBG_LOG");
|
||||
if (dbg_log_path != NULL) {
|
||||
FILE *fp;
|
||||
fp = freopen(dbg_log_path, "a", stdout);
|
||||
if (fp == nullptr) {
|
||||
std::cout << "ERROR: DbgLogger could not redirect all stdout to a file";
|
||||
}
|
||||
}
|
||||
os << std::endl;
|
||||
if (lvl_ == DbgLoggerLvl::EXCEPTION) {
|
||||
throw;
|
||||
}
|
||||
}
|
||||
static bool verbose;
|
||||
|
||||
private:
|
||||
DbgLoggerLvl lvl_;
|
||||
};
|
||||
#endif // OFFLINE_LOGGER_H_
|
|
@ -22,7 +22,16 @@
|
|||
#include <tuple>
|
||||
#include "debug/debugger/tensor_summary.h"
|
||||
|
||||
#ifdef OFFLINE_DBG_MODE
|
||||
#include "Eigen/Core"
|
||||
#include "Eigen/src/Core/arch/CUDA/Half.h"
|
||||
using float16 = Eigen::half;
|
||||
#include "offline_debug/offline_logger.h"
|
||||
#endif
|
||||
|
||||
#ifdef ONLINE_DBG_MODE
|
||||
namespace mindspore {
|
||||
#endif
|
||||
using CONDITION_TYPE = DebugServices::CONDITION_TYPE;
|
||||
|
||||
RangeCountCalculator::RangeCountCalculator()
|
||||
|
@ -281,4 +290,6 @@ template class TensorSummary<float16>;
|
|||
template class TensorSummary<float>;
|
||||
template class TensorSummary<double>;
|
||||
template class TensorSummary<bool>;
|
||||
#ifdef ONLINE_DBG_MODE
|
||||
} // namespace mindspore
|
||||
#endif
|
||||
|
|
|
@ -24,7 +24,9 @@
|
|||
|
||||
#include "debug/debug_services.h"
|
||||
|
||||
#ifdef ONLINE_DBG_MODE
|
||||
namespace mindspore {
|
||||
#endif
|
||||
class RangeCountCalculator {
|
||||
public:
|
||||
RangeCountCalculator();
|
||||
|
@ -121,5 +123,7 @@ class TensorSummary : public ITensorSummary {
|
|||
double_t GetZeroValPercent();
|
||||
void InitCalculators(const std::vector<DebugServices::watchpoint_t> &);
|
||||
};
|
||||
#ifdef ONLINE_DBG_MODE
|
||||
} // namespace mindspore
|
||||
#endif
|
||||
#endif // MINDSPORE_TENSOR_SUMMARY_H
|
||||
|
|
|
@ -16,37 +16,170 @@
|
|||
#ifndef MINDSPORE_CCSRC_DEBUG_TENSOR_DATA_H_
|
||||
#define MINDSPORE_CCSRC_DEBUG_TENSOR_DATA_H_
|
||||
|
||||
#include <algorithm>
|
||||
#include <vector>
|
||||
#include <string>
|
||||
#include <cstring>
|
||||
#include <iostream>
|
||||
#ifdef OFFLINE_DBG_MODE
|
||||
#include "debugger/offline_debug/offline_logger.h"
|
||||
#else
|
||||
#include "ir/tensor.h"
|
||||
#include "mindspore/core/utils/log_adapter.h"
|
||||
#endif
|
||||
|
||||
#ifdef ONLINE_DBG_MODE
|
||||
namespace mindspore {
|
||||
class TensorData {
|
||||
private:
|
||||
mindspore::tensor::TensorPtr tensor_ptr;
|
||||
std::string name;
|
||||
size_t slot;
|
||||
int execution_order;
|
||||
#endif
|
||||
|
||||
namespace MsTypeId {
|
||||
typedef enum MsTypeId : unsigned int {
|
||||
kTypeUnknown = 0,
|
||||
kMetaTypeBegin = kTypeUnknown,
|
||||
kMetaTypeType, // Type
|
||||
kMetaTypeAnything,
|
||||
kMetaTypeObject,
|
||||
kMetaTypeTypeType, // TypeType
|
||||
kMetaTypeProblem,
|
||||
kMetaTypeExternal,
|
||||
kMetaTypeNone,
|
||||
kMetaTypeNull,
|
||||
kMetaTypeEllipsis,
|
||||
kMetaTypeEnd,
|
||||
//
|
||||
// Object types
|
||||
//
|
||||
kObjectTypeBegin = kMetaTypeEnd,
|
||||
kObjectTypeNumber,
|
||||
kObjectTypeString,
|
||||
kObjectTypeList,
|
||||
kObjectTypeTuple,
|
||||
kObjectTypeSlice,
|
||||
kObjectTypeKeyword,
|
||||
kObjectTypeTensorType,
|
||||
kObjectTypeRowTensorType,
|
||||
kObjectTypeSparseTensorType,
|
||||
kObjectTypeUndeterminedType,
|
||||
kObjectTypeClass,
|
||||
kObjectTypeDictionary,
|
||||
kObjectTypeFunction,
|
||||
kObjectTypeJTagged,
|
||||
kObjectTypeSymbolicKeyType,
|
||||
kObjectTypeEnvType,
|
||||
kObjectTypeRefKey,
|
||||
kObjectTypeRef,
|
||||
kObjectTypeEnd,
|
||||
//
|
||||
// Number Types
|
||||
//
|
||||
kNumberTypeBegin = kObjectTypeEnd,
|
||||
kNumberTypeBool,
|
||||
kNumberTypeInt,
|
||||
kNumberTypeInt8,
|
||||
kNumberTypeInt16,
|
||||
kNumberTypeInt32,
|
||||
kNumberTypeInt64,
|
||||
kNumberTypeUInt,
|
||||
kNumberTypeUInt8,
|
||||
kNumberTypeUInt16,
|
||||
kNumberTypeUInt32,
|
||||
kNumberTypeUInt64,
|
||||
kNumberTypeFloat,
|
||||
kNumberTypeFloat16,
|
||||
kNumberTypeFloat32,
|
||||
kNumberTypeFloat64,
|
||||
kNumberTypeComplex64,
|
||||
kNumberTypeEnd
|
||||
} MsTypeId;
|
||||
} // namespace MsTypeId
|
||||
|
||||
typedef enum DbgDataType : unsigned int {
|
||||
DT_UNDEFINED = 0,
|
||||
// Basic types.
|
||||
DT_BOOL = 1, // bool
|
||||
|
||||
DT_INT8 = 2, // int8_t
|
||||
DT_INT16 = 3, // int16_t
|
||||
DT_INT32 = 4, // int32_t
|
||||
DT_INT64 = 5, // int64_t
|
||||
|
||||
DT_UINT8 = 6, // uint8_t
|
||||
DT_UINT16 = 7, // uint16_t
|
||||
DT_UINT32 = 8, // uint32_t
|
||||
DT_UINT64 = 9, // uint64_t
|
||||
|
||||
DT_FLOAT16 = 10, // float 16
|
||||
DT_FLOAT32 = 11, // float 32
|
||||
DT_FLOAT64 = 12, // float 64
|
||||
|
||||
DT_STRING = 13, // string
|
||||
DT_TENSOR = 14, // tensor
|
||||
DT_GRAPH = 15, // graph
|
||||
|
||||
// list type
|
||||
DT_BOOLS = 16, // list of bool
|
||||
|
||||
DT_INTS8 = 17, // list of int8_t
|
||||
DT_INTS16 = 18, // list of int16_t
|
||||
DT_INTS32 = 19, // list of int32_t
|
||||
DT_INTS64 = 20, // list of int64_t
|
||||
|
||||
DT_UINTS8 = 21, // list of uint8_t
|
||||
DT_UINTS16 = 22, // list of uint16_t
|
||||
DT_UINTS32 = 23, // list of uint32_t
|
||||
DT_UINTS64 = 24, // list of uint64_t
|
||||
|
||||
DT_FLOATS16 = 25, // list of float16
|
||||
DT_FLOATS32 = 26, // list of float32
|
||||
DT_FLOATS64 = 27, // list of float64
|
||||
|
||||
DT_STRINGS = 28, // list of string
|
||||
DT_TENSORS = 29, // list of tensor
|
||||
DT_GRAPHS = 30, // list of graph
|
||||
|
||||
DT_TUPLE = 31, // tuple
|
||||
DT_LIST = 32, // list
|
||||
DT_DICT = 33, // dictionary
|
||||
|
||||
// other types
|
||||
DT_NONE = 34, // None
|
||||
DT_SYM_INST = 35, // Symbolic Key Instance
|
||||
|
||||
// type related type
|
||||
DT_BASE_INT = 36, // type generic int
|
||||
DT_BASE_UINT = 37, // type generate unsigned int
|
||||
DT_BASE_FLOAT = 38, // type generate float
|
||||
DT_TYPE = 39, // type type
|
||||
DT_ANYTHING = 40, // type anything
|
||||
DT_REFKEY = 41, // type refkey
|
||||
DT_REF = 42 // type ref
|
||||
} DbgDataType;
|
||||
|
||||
class TensorData {
|
||||
public:
|
||||
TensorData() : slot(0), execution_order(-1) {}
|
||||
|
||||
TensorData(const TensorData &obj) {
|
||||
std::cout << "Copy Constructor" << std::endl;
|
||||
MS_LOG(INFO) << "Copy Constructor";
|
||||
this->name = obj.name;
|
||||
this->execution_order = obj.execution_order;
|
||||
this->slot = obj.slot;
|
||||
this->data_ptr = obj.data_ptr;
|
||||
this->size = obj.size;
|
||||
this->data_type = obj.data_type;
|
||||
this->data_type_size = obj.data_type_size;
|
||||
this->shape = obj.shape;
|
||||
this->iteration = obj.iteration;
|
||||
this->device_id = obj.device_id;
|
||||
#ifdef ONLINE_DBG_MODE
|
||||
this->tensor_ptr = obj.tensor_ptr;
|
||||
#endif
|
||||
}
|
||||
|
||||
~TensorData() {}
|
||||
|
||||
std::string GetName() { return this->name; }
|
||||
|
||||
mindspore::tensor::TensorPtr GetTensor() { return this->tensor_ptr; }
|
||||
|
||||
size_t GetSlot() { return this->slot; }
|
||||
|
||||
int GetExecutionOrder() { return this->execution_order; }
|
||||
|
@ -55,9 +188,179 @@ class TensorData {
|
|||
|
||||
void SetName(const std::string &name) { this->name = name; }
|
||||
|
||||
#ifdef ONLINE_DBG_MODE
|
||||
void SetTensor(mindspore::tensor::TensorPtr out_tensor) { this->tensor_ptr = out_tensor; }
|
||||
#endif
|
||||
|
||||
void SetSlot(size_t slot) { this->slot = slot; }
|
||||
|
||||
char *GetDataPtr() { return data_ptr; }
|
||||
|
||||
void SetDataPtr(char *data_ptr) { this->data_ptr = data_ptr; }
|
||||
|
||||
uint32_t GetNumElements() { return size / data_type_size; }
|
||||
|
||||
uint64_t GetByteSize() { return size; }
|
||||
|
||||
void SetByteSize(uint64_t size) { this->size = size; }
|
||||
|
||||
std::vector<int64_t> GetShape() { return shape; }
|
||||
|
||||
void SetShape(std::vector<int64_t> shape) { this->shape = shape; }
|
||||
|
||||
unsigned int GetIteration() { return iteration; }
|
||||
|
||||
void SetIteration(unsigned int iteration) { this->iteration = iteration; }
|
||||
|
||||
unsigned int GetDeviceId() { return device_id; }
|
||||
|
||||
void SetDeviceId(unsigned int device_id) { this->device_id = device_id; }
|
||||
|
||||
unsigned int GetRootGraphId() { return root_graph_id; }
|
||||
|
||||
void SetRootGraphId(unsigned int root_graph_id) { this->root_graph_id = root_graph_id; }
|
||||
|
||||
DbgDataType GetType() { return data_type; }
|
||||
|
||||
void SetType(unsigned int type) { ConvertMsToDbgType(type); }
|
||||
|
||||
void SetType(std::string type_name) { ConvertStringToDbgType(type_name); }
|
||||
|
||||
void ConvertMsToDbgType(uint32_t type) {
|
||||
switch (type) {
|
||||
case MsTypeId::kNumberTypeBool:
|
||||
this->data_type = DbgDataType::DT_BOOL;
|
||||
this->data_type_size = 1;
|
||||
break;
|
||||
case MsTypeId::kNumberTypeInt8:
|
||||
this->data_type = DbgDataType::DT_INT8;
|
||||
this->data_type_size = 1;
|
||||
break;
|
||||
case MsTypeId::kNumberTypeInt16:
|
||||
this->data_type = DbgDataType::DT_INT16;
|
||||
this->data_type_size = 2;
|
||||
break;
|
||||
case MsTypeId::kNumberTypeInt32:
|
||||
this->data_type = DbgDataType::DT_INT32;
|
||||
this->data_type_size = 4;
|
||||
break;
|
||||
case MsTypeId::kNumberTypeInt64:
|
||||
this->data_type = DbgDataType::DT_INT64;
|
||||
this->data_type_size = 8;
|
||||
break;
|
||||
case MsTypeId::kNumberTypeUInt8:
|
||||
this->data_type = DbgDataType::DT_UINT8;
|
||||
this->data_type_size = 1;
|
||||
break;
|
||||
case MsTypeId::kNumberTypeUInt16:
|
||||
this->data_type = DbgDataType::DT_UINT16;
|
||||
this->data_type_size = 2;
|
||||
break;
|
||||
case MsTypeId::kNumberTypeUInt32:
|
||||
this->data_type = DbgDataType::DT_UINT32;
|
||||
this->data_type_size = 4;
|
||||
break;
|
||||
case MsTypeId::kNumberTypeUInt64:
|
||||
this->data_type = DbgDataType::DT_UINT64;
|
||||
this->data_type_size = 8;
|
||||
break;
|
||||
case MsTypeId::kNumberTypeFloat16:
|
||||
this->data_type = DbgDataType::DT_FLOAT16;
|
||||
this->data_type_size = 2;
|
||||
break;
|
||||
case MsTypeId::kNumberTypeFloat32:
|
||||
this->data_type = DbgDataType::DT_FLOAT32;
|
||||
this->data_type_size = 4;
|
||||
break;
|
||||
case MsTypeId::kNumberTypeFloat64:
|
||||
this->data_type = DbgDataType::DT_FLOAT64;
|
||||
this->data_type_size = 8;
|
||||
break;
|
||||
case MsTypeId::kNumberTypeInt:
|
||||
this->data_type = DbgDataType::DT_BASE_INT;
|
||||
this->data_type_size = 4;
|
||||
break;
|
||||
case MsTypeId::kNumberTypeUInt:
|
||||
this->data_type = DbgDataType::DT_BASE_UINT;
|
||||
this->data_type_size = 4;
|
||||
break;
|
||||
case MsTypeId::kNumberTypeFloat:
|
||||
this->data_type = DbgDataType::DT_BASE_FLOAT;
|
||||
this->data_type_size = 4;
|
||||
break;
|
||||
default:
|
||||
MS_LOG(EXCEPTION) << "Unexpected type id: " << type;
|
||||
}
|
||||
}
|
||||
|
||||
void ConvertStringToDbgType(const std::string &type_name) {
|
||||
std::string type_name_lower = type_name;
|
||||
std::string trans_true_prefix = "kNumberType";
|
||||
if (type_name.find(trans_true_prefix) == 0) {
|
||||
type_name_lower = type_name.substr(trans_true_prefix.length());
|
||||
}
|
||||
(void)std::transform(type_name_lower.begin(), type_name_lower.end(), type_name_lower.begin(), ::tolower);
|
||||
if (type_name_lower == "bool") {
|
||||
this->data_type = DbgDataType::DT_BOOL;
|
||||
this->data_type_size = 1;
|
||||
} else if (type_name_lower == "int8") {
|
||||
this->data_type = DbgDataType::DT_INT8;
|
||||
this->data_type_size = 1;
|
||||
} else if (type_name_lower == "int16") {
|
||||
this->data_type = DbgDataType::DT_INT16;
|
||||
this->data_type_size = 2;
|
||||
} else if (type_name_lower == "int32") {
|
||||
this->data_type = DbgDataType::DT_INT32;
|
||||
this->data_type_size = 4;
|
||||
} else if (type_name_lower == "int64") {
|
||||
this->data_type = DbgDataType::DT_INT64;
|
||||
this->data_type_size = 8;
|
||||
} else if (type_name_lower == "uint8") {
|
||||
this->data_type = DbgDataType::DT_UINT8;
|
||||
this->data_type_size = 1;
|
||||
} else if (type_name_lower == "uint16") {
|
||||
this->data_type = DbgDataType::DT_UINT16;
|
||||
this->data_type_size = 2;
|
||||
} else if (type_name_lower == "uint32") {
|
||||
this->data_type = DbgDataType::DT_UINT32;
|
||||
this->data_type_size = 4;
|
||||
} else if (type_name_lower == "uint64") {
|
||||
this->data_type = DbgDataType::DT_UINT64;
|
||||
this->data_type_size = 8;
|
||||
} else if (type_name_lower == "float16") {
|
||||
this->data_type = DbgDataType::DT_FLOAT16;
|
||||
this->data_type_size = 2;
|
||||
} else if (type_name_lower == "float32") {
|
||||
this->data_type = DbgDataType::DT_FLOAT32;
|
||||
this->data_type_size = 4;
|
||||
} else if (type_name_lower == "float64") {
|
||||
this->data_type = DbgDataType::DT_FLOAT64;
|
||||
this->data_type_size = 8;
|
||||
} else if (type_name_lower == "") {
|
||||
this->data_type = DbgDataType::DT_UNDEFINED;
|
||||
this->data_type_size = 0;
|
||||
} else {
|
||||
MS_LOG(EXCEPTION) << "Unexpected type name: " << type_name;
|
||||
}
|
||||
}
|
||||
|
||||
private:
|
||||
char *data_ptr; // pointer to the pre-allocated memory
|
||||
uint64_t size; // size in bytes
|
||||
DbgDataType data_type; // internal debugger type
|
||||
unsigned int data_type_size;
|
||||
std::vector<int64_t> shape;
|
||||
std::string name;
|
||||
uint64_t slot;
|
||||
unsigned int iteration;
|
||||
unsigned int device_id;
|
||||
unsigned int root_graph_id;
|
||||
int execution_order;
|
||||
#ifdef ONLINE_DBG_MODE
|
||||
mindspore::tensor::TensorPtr tensor_ptr;
|
||||
#endif
|
||||
};
|
||||
#ifdef ONLINE_DBG_MODE
|
||||
} // namespace mindspore
|
||||
#endif
|
||||
#endif // MINDSPORE_CCSRC_DEBUG_TENSOR_DATA_H_
|
||||
|
|
|
@ -23,10 +23,14 @@
|
|||
#include <tuple>
|
||||
#include <string>
|
||||
#include <utility>
|
||||
#ifdef OFFLINE_DBG_MODE
|
||||
#include "debugger/offline_debug/offline_logger.h"
|
||||
#endif
|
||||
#include "debug/tensor_data.h"
|
||||
#ifdef ONLINE_DBG_MODE
|
||||
#include "debug/data_dump/dump_json_parser.h"
|
||||
#include "ir/dtype.h"
|
||||
namespace mindspore {
|
||||
#endif
|
||||
class TensorLoader {
|
||||
public:
|
||||
TensorLoader() : iter_num(-1) {}
|
||||
|
@ -152,9 +156,10 @@ class TensorLoader {
|
|||
|
||||
void set_iter_num(uint32_t iter_num) { this->iter_num = iter_num; }
|
||||
|
||||
#ifdef ONLINE_DBG_MODE
|
||||
bool DumpTensorToFile(const std::string &tensor_name, bool trans_flag, const std::string &filepath,
|
||||
const std::string &host_fmt, const std::vector<int64_t> &host_shape, TypeId host_type,
|
||||
TypeId addr_type_id, const std::string &addr_format, size_t slot) const {
|
||||
TypeId addr_type_id, const std::string &addr_format, size_t slot) {
|
||||
if (filepath.empty()) {
|
||||
MS_LOG(ERROR) << "Dump file path is null!";
|
||||
return false;
|
||||
|
@ -181,21 +186,24 @@ class TensorLoader {
|
|||
auto iter = tensor_list_map.find(tensor_loader_name);
|
||||
if (iter != tensor_list_map.end()) {
|
||||
std::shared_ptr<TensorData> node = iter->second;
|
||||
mindspore::tensor::TensorPtr out_tensor = node->GetTensor();
|
||||
size_t host_size = out_tensor->data().nbytes();
|
||||
size_t host_size = node->GetByteSize();
|
||||
|
||||
return DumpJsonParser::DumpToFile(path, out_tensor->data_c(), host_size);
|
||||
return DumpJsonParser::DumpToFile(path, node->GetDataPtr(), host_size);
|
||||
}
|
||||
MS_LOG(INFO) << "Tensor name:" << tensor_name << " not found in tensor_list_map";
|
||||
return true;
|
||||
}
|
||||
#endif
|
||||
|
||||
private:
|
||||
// the pair is (device_id, iteration)
|
||||
std::map<std::string, std::shared_ptr<TensorData>> tensor_list_map;
|
||||
std::multimap<std::string, std::shared_ptr<TensorData>> node_tensor_map;
|
||||
std::map<std::string, std::shared_ptr<TensorData>> prev_tensor_list_map;
|
||||
uint32_t iter_num;
|
||||
std::mutex lock_;
|
||||
};
|
||||
#ifdef ONLINE_DBG_MODE
|
||||
} // namespace mindspore
|
||||
#endif
|
||||
#endif // MINDSPORE_CCSRC_DEBUG_TENSOR_LOAD_H_
|
||||
|
|
|
@ -713,6 +713,10 @@ bool AscendDeviceAddress::LoadMemToHost(const std::string &tensor_name, int exec
|
|||
}
|
||||
MS_LOG(INFO) << "E2E tensor name is " << tensor_name;
|
||||
tensor_data->SetTensor(out_tensor);
|
||||
tensor_data->SetDataPtr(static_cast<char *>(out_tensor->data_c()));
|
||||
tensor_data->SetByteSize(out_tensor->data().nbytes());
|
||||
tensor_data->SetType((unsigned int)host_type);
|
||||
tensor_data->SetShape(out_tensor->shape());
|
||||
ret = Debugger::GetInstance()->LoadNewTensor(tensor_data, keep_prev);
|
||||
return ret;
|
||||
}
|
||||
|
|
|
@ -93,7 +93,7 @@ void GPUDeviceAddress::ClearDeviceMemory() {
|
|||
}
|
||||
|
||||
GPUDeviceAddress::~GPUDeviceAddress() { ClearDeviceMemory(); }
|
||||
#ifdef ENABLE_DEBUGGER
|
||||
|
||||
bool GPUDeviceAddress::LoadMemToHost(const std::string &tensor_name, int execution_order, const std::string &host_fmt,
|
||||
const ShapeVector &host_shape, TypeId host_type, size_t slot,
|
||||
bool keep_prev) const {
|
||||
|
@ -117,13 +117,16 @@ bool GPUDeviceAddress::LoadMemToHost(const std::string &tensor_name, int executi
|
|||
auto tensor_data = std::make_shared<mindspore::TensorData>();
|
||||
tensor_data->SetName(tensor_name);
|
||||
tensor_data->SetExecutionOrder(execution_order);
|
||||
tensor_data->SetTensor(out_tensor);
|
||||
tensor_data->SetSlot(slot);
|
||||
tensor_data->SetTensor(out_tensor);
|
||||
tensor_data->SetDataPtr(static_cast<char *>(out_tensor->data_c()));
|
||||
tensor_data->SetByteSize(out_tensor->data().nbytes());
|
||||
tensor_data->SetType((unsigned int)host_type);
|
||||
tensor_data->SetShape(out_tensor->shape());
|
||||
ret = Debugger::GetInstance()->LoadNewTensor(tensor_data, keep_prev);
|
||||
MS_LOG(INFO) << "E2E tensor name is " << tensor_name;
|
||||
return ret;
|
||||
}
|
||||
#endif
|
||||
} // namespace gpu
|
||||
} // namespace device
|
||||
} // namespace mindspore
|
||||
|
|
|
@ -114,32 +114,33 @@ static int GetSlogLevel(MsLogLevel level) {
|
|||
|
||||
static const char *GetSubModuleName(SubModuleId module_id) {
|
||||
static const char *sub_module_names[NUM_SUBMODUES] = {
|
||||
"UNKNOWN", // SM_UNKNOWN
|
||||
"CORE", // SM_CORE
|
||||
"ANALYZER", // SM_ANALYZER
|
||||
"COMMON", // SM_COMMON
|
||||
"DEBUG", // SM_DEBUG
|
||||
"DEVICE", // SM_DEVICE
|
||||
"GE_ADPT", // SM_GE_ADPT
|
||||
"IR", // SM_IR
|
||||
"KERNEL", // SM_KERNEL
|
||||
"MD", // SM_MD
|
||||
"ME", // SM_ME
|
||||
"EXPRESS", // SM_EXPRESS
|
||||
"OPTIMIZER", // SM_OPTIMIZER
|
||||
"PARALLEL", // SM_PARALLEL
|
||||
"PARSER", // SM_PARSER
|
||||
"PIPELINE", // SM_PIPELINE
|
||||
"PRE_ACT", // SM_PRE_ACT
|
||||
"PYNATIVE", // SM_PYNATIVE
|
||||
"SESSION", // SM_SESSION
|
||||
"UTILS", // SM_UTILS
|
||||
"VM", // SM_VM
|
||||
"PROFILER", // SM_PROFILER
|
||||
"PS", // SM_PS
|
||||
"LITE", // SM_LITE
|
||||
"HCCL_ADPT", // SM_HCCL_ADPT
|
||||
"MINDQUANTUM" // SM_MINDQUANTUM
|
||||
"UNKNOWN", // SM_UNKNOWN
|
||||
"CORE", // SM_CORE
|
||||
"ANALYZER", // SM_ANALYZER
|
||||
"COMMON", // SM_COMMON
|
||||
"DEBUG", // SM_DEBUG
|
||||
"OFFLINE_DEBUG", // SM_OFFLINE_DEBUG
|
||||
"DEVICE", // SM_DEVICE
|
||||
"GE_ADPT", // SM_GE_ADPT
|
||||
"IR", // SM_IR
|
||||
"KERNEL", // SM_KERNEL
|
||||
"MD", // SM_MD
|
||||
"ME", // SM_ME
|
||||
"EXPRESS", // SM_EXPRESS
|
||||
"OPTIMIZER", // SM_OPTIMIZER
|
||||
"PARALLEL", // SM_PARALLEL
|
||||
"PARSER", // SM_PARSER
|
||||
"PIPELINE", // SM_PIPELINE
|
||||
"PRE_ACT", // SM_PRE_ACT
|
||||
"PYNATIVE", // SM_PYNATIVE
|
||||
"SESSION", // SM_SESSION
|
||||
"UTILS", // SM_UTILS
|
||||
"VM", // SM_VM
|
||||
"PROFILER", // SM_PROFILER
|
||||
"PS", // SM_PS
|
||||
"LITE", // SM_LITE
|
||||
"HCCL_ADPT", // SM_HCCL_ADPT
|
||||
"MINDQUANTUM" // SM_MINDQUANTUM
|
||||
};
|
||||
|
||||
return sub_module_names[module_id % NUM_SUBMODUES];
|
||||
|
|
|
@ -111,6 +111,7 @@ enum SubModuleId : int {
|
|||
SM_ANALYZER, // static analyzer
|
||||
SM_COMMON, // common
|
||||
SM_DEBUG, // debug
|
||||
SM_OFFLINE_DEBUG, // offline debug
|
||||
SM_DEVICE, // device
|
||||
SM_GE_ADPT, // ge adapter
|
||||
SM_IR, // IR
|
||||
|
|
|
@ -0,0 +1,21 @@
|
|||
# Copyright 2020 Huawei Technologies Co., Ltd
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
"""
|
||||
This module provides APIs to load and process dump data, i.e. read tensors, check
|
||||
for watchpoints and other debugging services.
|
||||
"""
|
||||
|
||||
from . import dbg_services
|
||||
from . import mi_validator_helpers
|
||||
from . import mi_validators
|
|
@ -0,0 +1,870 @@
|
|||
# Copyright 2021 Huawei Technologies Co., Ltd
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
# ==============================================================================
|
||||
"""
|
||||
The module DbgServices provides offline debugger APIs.
|
||||
"""
|
||||
|
||||
import mindspore._mindspore_offline_debug as cds
|
||||
from mindspore.offline_debug.mi_validators import check_init, check_initialize, check_add_watchpoint, check_remove_watchpoint, check_check_watchpoints, check_read_tensors, check_initialize_done, check_tensor_info_init, check_tensor_data_init, check_watchpoint_hit_init, check_parameter_init
|
||||
|
||||
|
||||
def get_version():
|
||||
"""
|
||||
Function to return offline Debug Services version.
|
||||
|
||||
Returns:
|
||||
version (str): dbgServices version.
|
||||
|
||||
Examples:
|
||||
>>> from mindspore.ccsrc.debug.debugger.offline_debug import dbg_services
|
||||
>>> version = dbg_services.get_version()
|
||||
"""
|
||||
return cds.DbgServices(False).GetVersion()
|
||||
|
||||
class DbgLogger:
|
||||
"""
|
||||
Offline Debug Services Logger
|
||||
|
||||
Args:
|
||||
verbose (bool): whether to print logs.
|
||||
|
||||
Examples:
|
||||
>>> from mindspore.ccsrc.debug.debugger.offline_debug import dbg_services
|
||||
>>> version = dbg_services.DbgLogger(verbose=False)
|
||||
"""
|
||||
def __init__(self, verbose):
|
||||
self.verbose = verbose
|
||||
|
||||
def __call__(self, *logs):
|
||||
if self.verbose:
|
||||
print(logs)
|
||||
|
||||
|
||||
log = DbgLogger(False)
|
||||
|
||||
|
||||
class DbgServices():
|
||||
"""
|
||||
Offline Debug Services class.
|
||||
|
||||
Args:
|
||||
dump_file_path (str): directory where the dump files are saved.
|
||||
verbose (bool): whether to print logs (default: False)..
|
||||
|
||||
Examples:
|
||||
>>> from mindspore.ccsrc.debug.debugger.offline_debug import dbg_services
|
||||
>>> d = dbg_services.DbgServices(dump_file_path="dump_file_path",
|
||||
>>> verbose=True)
|
||||
"""
|
||||
|
||||
@check_init
|
||||
def __init__(self, dump_file_path, verbose=False):
|
||||
log.verbose = verbose
|
||||
log("in Python __init__, file path is ", dump_file_path)
|
||||
self.dump_file_path = dump_file_path
|
||||
self.dbg_instance = cds.DbgServices(verbose)
|
||||
self.version = self.dbg_instance.GetVersion()
|
||||
self.verbose = verbose
|
||||
self.initialized = False
|
||||
|
||||
@check_initialize
|
||||
def initialize(self, net_name, is_sync_mode=True):
|
||||
"""
|
||||
Initialize Debug Service.
|
||||
|
||||
Args:
|
||||
net_name (str): Network name.
|
||||
is_sync_mode (bool): Whether to process synchronous or asynchronous dump files mode
|
||||
(default: True (synchronous)).
|
||||
|
||||
Returns:
|
||||
Initialized Debug Service instance.
|
||||
|
||||
Examples:
|
||||
>>> from mindspore.ccsrc.debug.debugger.offline_debug import dbg_services
|
||||
>>> d = dbg_services.DbgServices(dump_file_path="dump_file_path",
|
||||
>>> verbose=True)
|
||||
>>> d_init = d.initialize(net_name="network name", is_sync_mode=True)
|
||||
"""
|
||||
|
||||
log("in Python Initialize dump_file_path ", self.dump_file_path)
|
||||
self.initialized = True
|
||||
return self.dbg_instance.Initialize(net_name, self.dump_file_path, is_sync_mode)
|
||||
|
||||
@check_initialize_done
|
||||
@check_add_watchpoint
|
||||
def add_watchpoint(self, watchpoint_id, watch_condition, check_node_list, parameter_list):
|
||||
"""
|
||||
Adding watchpoint to Debug Service instance.
|
||||
|
||||
Args:
|
||||
watchpoint_id (int): Watchpoint id
|
||||
watch_condition (int): A representation of the condition to be checked.
|
||||
check_node_list (dict): Dictionary of node names (str or '*' to check all nodes) as key,
|
||||
mapping to device_id (list of ints or '*' to check all devices),
|
||||
root_graph_id (list of ints or '*' to check all graphs) and is_parameter (bool).
|
||||
parameter_list (list): List of parameters in watchpoint. Parameters should be instances of Parameter class.
|
||||
Each parameter describes the value to be checked in watchpoint.
|
||||
|
||||
Returns:
|
||||
Debug Service instance with added watchpoint.
|
||||
|
||||
Examples:
|
||||
>>> from mindspore.ccsrc.debug.debugger.offline_debug import dbg_services
|
||||
>>> d = dbg_services.DbgServices(dump_file_path="dump_file_path",
|
||||
>>> verbose=True)
|
||||
>>> d_init = d.initialize(is_sync_mode=True)
|
||||
>>> d_wp = d_init.add_watchpoint(watchpoint_id=1,
|
||||
>>> watch_condition=6,
|
||||
>>> check_node_list={"conv2.bias" : {"device_id": [0],
|
||||
root_graph_id: [0], "is_parameter": True}},
|
||||
>>> parameter_list=[dbg_services.Parameter(name="param",
|
||||
>>> disabled=False,
|
||||
>>> value=0.0,
|
||||
>>> hit=False,
|
||||
>>> actual_value=0.0)])
|
||||
"""
|
||||
|
||||
log("in Python AddWatchpoint")
|
||||
for node_name, node_info in check_node_list.items():
|
||||
for info_name, info_param in node_info.items():
|
||||
if info_name in ["device_id", "root_graph_id"]:
|
||||
if info_param in ["*"]:
|
||||
check_node_list[node_name][info_name] = ["*"]
|
||||
else:
|
||||
check_node_list[node_name][info_name] = list(map(str, info_param))
|
||||
parameter_list_inst = []
|
||||
for elem in parameter_list:
|
||||
parameter_list_inst.append(elem.instance)
|
||||
return self.dbg_instance.AddWatchpoint(watchpoint_id, watch_condition, check_node_list, parameter_list_inst)
|
||||
|
||||
@check_initialize_done
|
||||
@check_remove_watchpoint
|
||||
def remove_watchpoint(self, watchpoint_id):
|
||||
"""
|
||||
Removing watchpoint from Debug Service instance.
|
||||
|
||||
Args:
|
||||
watchpoint_id (int): Watchpoint id
|
||||
|
||||
Returns:
|
||||
Debug Service instance with removed watchpoint.
|
||||
|
||||
Examples:
|
||||
>>> from mindspore.ccsrc.debug.debugger.offline_debug import dbg_services
|
||||
>>> d = dbg_services.DbgServices(dump_file_path="dump_file_path",
|
||||
>>> verbose=True)
|
||||
>>> d_init = d.initialize(is_sync_mode=True)
|
||||
>>> d_wp = d_init.add_watchpoint(watchpoint_id=1,
|
||||
>>> watch_condition=6,
|
||||
>>> check_node_list={"conv2.bias" : {"device_id": [5],
|
||||
root_graph_id: [0], "is_parameter": True}},
|
||||
>>> parameter_list=[dbg_services.Parameter(name="param",
|
||||
>>> disabled=False,
|
||||
>>> value=0.0,
|
||||
>>> hit=False,
|
||||
>>> actual_value=0.0)])
|
||||
>>> d_wp = d_wp.remove_watchpoint(watchpoint_id=1)
|
||||
"""
|
||||
|
||||
log("in Python Remove Watchpoint id ", watchpoint_id)
|
||||
return self.dbg_instance.RemoveWatchpoint(watchpoint_id)
|
||||
|
||||
@check_initialize_done
|
||||
@check_check_watchpoints
|
||||
def check_watchpoints(self, iteration):
|
||||
"""
|
||||
Checking watchpoint at given iteration.
|
||||
|
||||
Args:
|
||||
iteration (int): Watchpoint check iteration.
|
||||
|
||||
Returns:
|
||||
Watchpoint hit list.
|
||||
|
||||
Examples:
|
||||
>>> from mindspore.ccsrc.debug.debugger.offline_debug import dbg_services
|
||||
>>> d = dbg_services.DbgServices(dump_file_path="dump_file_path",
|
||||
>>> verbose=True)
|
||||
>>> d_init = d.initialize(is_sync_mode=True)
|
||||
>>> d_wp = d_init.add_watchpoint(id=1,
|
||||
>>> watch_condition=6,
|
||||
>>> check_node_list={"conv2.bias" : {"device_id": [5],
|
||||
root_graph_id: [0], "is_parameter": True}},
|
||||
>>> parameter_list=[dbg_services.Parameter(name="param",
|
||||
>>> disabled=False,
|
||||
>>> value=0.0,
|
||||
>>> hit=False,
|
||||
>>> actual_value=0.0)])
|
||||
>>> watchpoints = d_wp.check_watchpoints(iteration=8)
|
||||
"""
|
||||
|
||||
log("in Python CheckWatchpoints iteration ", iteration)
|
||||
watchpoint_list = self.dbg_instance.CheckWatchpoints(iteration)
|
||||
watchpoint_hit_list = []
|
||||
for watchpoint in watchpoint_list:
|
||||
name = watchpoint.get_name()
|
||||
slot = watchpoint.get_slot()
|
||||
condition = watchpoint.get_condition()
|
||||
watchpoint_id = watchpoint.get_watchpoint_id()
|
||||
parameters = watchpoint.get_parameters()
|
||||
error_code = watchpoint.get_error_code()
|
||||
device_id = watchpoint.get_device_id()
|
||||
root_graph_id = watchpoint.get_root_graph_id()
|
||||
param_list = []
|
||||
for param in parameters:
|
||||
p_name = param.get_name()
|
||||
disabled = param.get_disabled()
|
||||
value = param.get_value()
|
||||
hit = param.get_hit()
|
||||
actual_value = param.get_actual_value()
|
||||
param_list.append(Parameter(p_name, disabled, value, hit, actual_value))
|
||||
watchpoint_hit_list.append(WatchpointHit(name, slot, condition, watchpoint_id,
|
||||
param_list, error_code, device_id, root_graph_id))
|
||||
return watchpoint_hit_list
|
||||
|
||||
@check_initialize_done
|
||||
@check_read_tensors
|
||||
def read_tensors(self, info):
|
||||
"""
|
||||
Returning tensor data object describing the tensor requested tensor.
|
||||
|
||||
Args:
|
||||
info (list): List of TensorInfo objects.
|
||||
|
||||
Returns:
|
||||
TensorData list (list).
|
||||
|
||||
Examples:
|
||||
>>> from mindspore.ccsrc.debug.debugger.offline_debug import dbg_services
|
||||
>>> d = dbg_services.DbgServices(dump_file_path="dump_file_path",
|
||||
>>> verbose=True)
|
||||
>>> d_init = d.initialize(is_sync_mode=True)
|
||||
>>> tensor_data_list = d_init.read_tensors([dbg_services.TensorInfo(node_name="conv2.bias",
|
||||
>>> slot=0,
|
||||
>>> iteration=8,
|
||||
>>> device_id=5,
|
||||
>>> root_graph_id=0,
|
||||
>>> is_parameter=True)])
|
||||
"""
|
||||
|
||||
log("in Python ReadTensors info ", info)
|
||||
info_list_inst = []
|
||||
for elem in info:
|
||||
log("in Python ReadTensors info ", info)
|
||||
info_list_inst.append(elem.instance)
|
||||
tensor_data_list = self.dbg_instance.ReadTensors(info_list_inst)
|
||||
tensor_data_list_ret = []
|
||||
for elem in tensor_data_list:
|
||||
if elem.get_data_size() == 0:
|
||||
tensor_data = TensorData(b'', elem.get_data_size(), elem.get_dtype(), elem.get_shape())
|
||||
else:
|
||||
tensor_data = TensorData(elem.get_data_ptr(), elem.get_data_size(), elem.get_dtype(), elem.get_shape())
|
||||
tensor_data_list_ret.append(tensor_data)
|
||||
return tensor_data_list_ret
|
||||
|
||||
class TensorInfo():
|
||||
"""
|
||||
Tensor Information class.
|
||||
|
||||
Args:
|
||||
node_name (str): Fully qualified name of the desired node.
|
||||
slot (int): The particular output for the requested node.
|
||||
iteration (int): The desired itraretion to gather tensor information.
|
||||
device_id (int): The desired device id to gather tensor information.
|
||||
is_parameter (bool): Whether node is a parameter (input, constant, bias, parameter).
|
||||
|
||||
Examples:
|
||||
>>> from mindspore.ccsrc.debug.debugger.offline_debug import dbg_services
|
||||
>>> tensor_info = dbg_services.TensorInfo(node_name="conv2.bias",
|
||||
>>> slot=0,
|
||||
>>> iteration=8,
|
||||
>>> device_id=5,
|
||||
>>> root_graph_id=0,
|
||||
>>> is_parameter=True)
|
||||
"""
|
||||
|
||||
@check_tensor_info_init
|
||||
def __init__(self, node_name, slot, iteration, device_id, root_graph_id, is_parameter):
|
||||
self.instance = cds.tensor_info(node_name, slot, iteration, device_id, root_graph_id, is_parameter)
|
||||
|
||||
@property
|
||||
def node_name(self):
|
||||
"""
|
||||
Function to receive TensorInfo node_name.
|
||||
|
||||
Returns:
|
||||
node_name of TensorInfo instance (str).
|
||||
|
||||
Examples:
|
||||
>>> from mindspore.ccsrc.debug.debugger.offline_debug import dbg_services
|
||||
>>> tensor_info = dbg_services.TensorInfo(node_name="conv2.bias",
|
||||
>>> slot=0,
|
||||
>>> iteration=8,
|
||||
>>> device_id=5,
|
||||
>>> root_graph_id=0,
|
||||
>>> is_parameter=True)
|
||||
>>> name = tensor_info.node_name
|
||||
"""
|
||||
|
||||
return self.instance.get_node_name()
|
||||
|
||||
@property
|
||||
def slot(self):
|
||||
"""
|
||||
Function to receive TensorInfo slot.
|
||||
|
||||
Returns:
|
||||
slot of TensorInfo instance (int).
|
||||
|
||||
Examples:
|
||||
>>> from mindspore.ccsrc.debug.debugger.offline_debug import dbg_services
|
||||
>>> tensor_info = dbg_services.TensorInfo(node_name="conv2.bias",
|
||||
>>> slot=0,
|
||||
>>> iteration=8,
|
||||
>>> device_id=5,
|
||||
>>> root_graph_id=0,
|
||||
>>> is_parameter=True)
|
||||
>>> slot = tensor_info.slot
|
||||
"""
|
||||
|
||||
return self.instance.get_slot()
|
||||
|
||||
@property
|
||||
def iteration(self):
|
||||
"""
|
||||
Function to receive TensorInfo iteration.
|
||||
|
||||
Returns:
|
||||
iteration of TensorInfo instance (int).
|
||||
|
||||
Examples:
|
||||
>>> from mindspore.ccsrc.debug.debugger.offline_debug import dbg_services
|
||||
>>> tensor_info = dbg_services.TensorInfo(node_name="conv2.bias",
|
||||
>>> slot=0,
|
||||
>>> iteration=8,
|
||||
>>> device_id=5,
|
||||
>>> root_graph_id=0,
|
||||
>>> is_parameter=True)
|
||||
>>> iteration = tensor_info.iteration
|
||||
"""
|
||||
|
||||
return self.instance.get_iteration()
|
||||
|
||||
@property
|
||||
def device_id(self):
|
||||
"""
|
||||
Function to receive TensorInfo device_id.
|
||||
|
||||
Returns:
|
||||
device_id of TensorInfo instance (int).
|
||||
|
||||
Examples:
|
||||
>>> from mindspore.ccsrc.debug.debugger.offline_debug import dbg_services
|
||||
>>> tensor_info = dbg_services.TensorInfo(node_name="conv2.bias",
|
||||
>>> slot=0,
|
||||
>>> iteration=8,
|
||||
>>> device_id=5,
|
||||
>>> root_graph_id=0,
|
||||
>>> is_parameter=True)
|
||||
>>> device_id = tensor_info.device_id
|
||||
"""
|
||||
|
||||
@property
|
||||
def root_graph_id(self):
|
||||
"""
|
||||
Function to receive TensorInfo root_graph_id.
|
||||
|
||||
Returns:
|
||||
root_graph_id of TensorInfo instance (int).
|
||||
|
||||
Examples:
|
||||
>>> from mindspore.ccsrc.debug.debugger.offline_debug import dbg_services
|
||||
>>> tensor_info = dbg_services.TensorInfo(node_name="conv2.bias",
|
||||
>>> slot=0,
|
||||
>>> iteration=8,
|
||||
>>> device_id=5,
|
||||
>>> root_graph_id=0,
|
||||
>>> is_parameter=True)
|
||||
>>> device_id = tensor_info.root_graph_id
|
||||
"""
|
||||
|
||||
return self.instance.get_root_graph_id()
|
||||
|
||||
@property
|
||||
def is_parameter(self):
|
||||
"""
|
||||
Function to receive TensorInfo is_parameter.
|
||||
|
||||
Returns:
|
||||
is_parameter of TensorInfo instance (bool).
|
||||
|
||||
Examples:
|
||||
>>> from mindspore.ccsrc.debug.debugger.offline_debug import dbg_services
|
||||
>>> tensor_info = dbg_services.TensorInfo(node_name="conv2.bias",
|
||||
>>> slot=0,
|
||||
>>> iteration=8,
|
||||
>>> device_id=5,
|
||||
>>> root_graph_id=0,
|
||||
>>> is_parameter=True)
|
||||
>>> is_parameter = tensor_info.is_parameter
|
||||
"""
|
||||
|
||||
return self.instance.get_is_parameter()
|
||||
|
||||
class TensorData():
|
||||
"""
|
||||
TensorData class.
|
||||
|
||||
Args:
|
||||
data_ptr (byte): Data pointer.
|
||||
data_size (int): Size of data in bytes.
|
||||
dtype (int): An encoding representing the type of TensorData.
|
||||
shape (list): Shape of tensor.
|
||||
|
||||
Examples:
|
||||
>>> from mindspore.ccsrc.debug.debugger.offline_debug import dbg_services
|
||||
>>> tensor_data = dbg_services.TensorData(data_ptr=b'\xba\xd0\xba\xd0',
|
||||
>>> data_size=4,
|
||||
>>> dtype=0,
|
||||
>>> shape=[2, 2])
|
||||
"""
|
||||
|
||||
@check_tensor_data_init
|
||||
def __init__(self, data_ptr, data_size, dtype, shape):
|
||||
self.instance = cds.tensor_data(data_ptr, data_size, dtype, shape)
|
||||
|
||||
@property
|
||||
def data_ptr(self):
|
||||
"""
|
||||
Function to receive TensorData data_ptr.
|
||||
|
||||
Returns:
|
||||
data_ptr of TensorData instance (byte).
|
||||
|
||||
Examples:
|
||||
>>> from mindspore.ccsrc.debug.debugger.offline_debug import dbg_services
|
||||
>>> tensor_data = dbg_services.TensorData(data_ptr=b'\xba\xd0\xba\xd0',
|
||||
>>> data_size=4,
|
||||
>>> dtype=0,
|
||||
>>> shape=[2, 2])
|
||||
>>> data_ptr = tensor_data.data_ptr
|
||||
"""
|
||||
|
||||
return self.instance.get_data_ptr()
|
||||
|
||||
@property
|
||||
def data_size(self):
|
||||
"""
|
||||
Function to receive TensorData data_size.
|
||||
|
||||
Returns:
|
||||
data_size of TensorData instance (int).
|
||||
|
||||
Examples:
|
||||
>>> from mindspore.ccsrc.debug.debugger.offline_debug import dbg_services
|
||||
>>> tensor_data = dbg_services.TensorData(data_ptr=b'\xba\xd0\xba\xd0',
|
||||
>>> data_size=4,
|
||||
>>> dtype=0,
|
||||
>>> shape=[2, 2])
|
||||
>>> data_size = tensor_data.data_size
|
||||
"""
|
||||
|
||||
return self.instance.get_data_size()
|
||||
|
||||
@property
|
||||
def dtype(self):
|
||||
"""
|
||||
Function to receive TensorData dtype.
|
||||
|
||||
Returns:
|
||||
dtype of TensorData instance (int).
|
||||
|
||||
Examples:
|
||||
>>> from mindspore.ccsrc.debug.debugger.offline_debug import dbg_services
|
||||
>>> tensor_data = dbg_services.TensorData(data_ptr=b'\xba\xd0\xba\xd0',
|
||||
>>> data_size=4,
|
||||
>>> dtype=0,
|
||||
>>> shape=[2, 2])
|
||||
>>> dtype = tensor_data.dtype
|
||||
"""
|
||||
|
||||
return self.instance.get_dtype()
|
||||
|
||||
@property
|
||||
def shape(self):
|
||||
"""
|
||||
Function to receive TensorData shape.
|
||||
|
||||
Returns:
|
||||
shape of TensorData instance (list).
|
||||
|
||||
Examples:
|
||||
>>> from mindspore.ccsrc.debug.debugger.offline_debug import dbg_services
|
||||
>>> tensor_data = dbg_services.TensorData(data_ptr=b'\xba\xd0\xba\xd0',
|
||||
>>> data_size=4,
|
||||
>>> dtype=0,
|
||||
>>> shape=[2, 2])
|
||||
>>> shape = tensor_data.shape
|
||||
"""
|
||||
|
||||
return self.instance.get_shape()
|
||||
|
||||
class WatchpointHit():
|
||||
"""
|
||||
WatchpointHit class.
|
||||
|
||||
Args:
|
||||
name (str): Name of WatchpointHit instance.
|
||||
slot (int): The numerical label of an output.
|
||||
condition (int): A representation of the condition to be checked.
|
||||
watchpoint_id (int): Watchpoint id.
|
||||
parameters (list): A list of all parameters for WatchpointHit instance.
|
||||
Parameters have to be instances of Parameter class.
|
||||
error_code (int): An explanation of certain scenarios where watchpoint could not be checked.
|
||||
device_id (int): Device id where the watchpoint is hit.
|
||||
root_graph_id (int): Root graph id where the watchpoint is hit.
|
||||
|
||||
Examples:
|
||||
>>> from mindspore.ccsrc.debug.debugger.offline_debug import dbg_services
|
||||
>>> watchpoint_hit = dbg_services.WatchpointHit(name="hit1",
|
||||
>>> slot=1,
|
||||
>>> condition=2,
|
||||
>>> watchpoint_id=3,
|
||||
>>> parameters=[param1, param2],
|
||||
>>> error_code=0,
|
||||
>>> device_id=1,
|
||||
>>> root_graph_id=1)
|
||||
"""
|
||||
|
||||
@check_watchpoint_hit_init
|
||||
def __init__(self, name, slot, condition, watchpoint_id, parameters, error_code, device_id, root_graph_id):
|
||||
parameter_list_inst = []
|
||||
for elem in parameters:
|
||||
parameter_list_inst.append(elem.instance)
|
||||
self.instance = cds.watchpoint_hit(name, slot, condition, watchpoint_id,
|
||||
parameter_list_inst, error_code, device_id, root_graph_id)
|
||||
|
||||
@property
|
||||
def name(self):
|
||||
"""
|
||||
Function to receive WatchpointHit name.
|
||||
|
||||
Returns:
|
||||
name of WatchpointHit instance (str).
|
||||
|
||||
Examples:
|
||||
>>> from mindspore.ccsrc.debug.debugger.offline_debug import dbg_services
|
||||
>>> watchpoint_hit = dbg_services.WatchpointHit(name="hit1",
|
||||
>>> slot=1,
|
||||
>>> condition=2,
|
||||
>>> watchpoint_id=3,
|
||||
>>> parameters=[param1, param2],
|
||||
>>> error_code=0,
|
||||
>>> device_id=1,
|
||||
>>> root_graph_id=1)
|
||||
>>> name = watchpoint_hit.name
|
||||
"""
|
||||
|
||||
return self.instance.get_name()
|
||||
|
||||
@property
|
||||
def slot(self):
|
||||
"""
|
||||
Function to receive WatchpointHit slot.
|
||||
|
||||
Returns:
|
||||
slot of WatchpointHit instance (int).
|
||||
|
||||
Examples:
|
||||
>>> from mindspore.ccsrc.debug.debugger.offline_debug import dbg_services
|
||||
>>> watchpoint_hit = dbg_services.WatchpointHit(name="hit1",
|
||||
>>> slot=1,
|
||||
>>> condition=2,
|
||||
>>> watchpoint_id=3,
|
||||
>>> parameters=[param1, param2],
|
||||
>>> error_code=0,
|
||||
>>> device_id=1,
|
||||
>>> root_graph_id=1)
|
||||
>>> slot = watchpoint_hit.slot
|
||||
"""
|
||||
|
||||
return self.instance.get_slot()
|
||||
|
||||
@property
|
||||
def condition(self):
|
||||
"""
|
||||
Function to receive WatchpointHit condition.
|
||||
|
||||
Returns:
|
||||
condition of WatchpointHit instance (int).
|
||||
|
||||
Examples:
|
||||
>>> from mindspore.ccsrc.debug.debugger.offline_debug import dbg_services
|
||||
>>> watchpoint_hit = dbg_services.WatchpointHit(name="hit1",
|
||||
>>> slot=1,
|
||||
>>> condition=2,
|
||||
>>> watchpoint_id=3,
|
||||
>>> parameters=[param1, param2],
|
||||
>>> error_code=0,
|
||||
>>> device_id=1,
|
||||
>>> root_graph_id=1)
|
||||
>>> condition = watchpoint_hit.condition
|
||||
"""
|
||||
|
||||
return self.instance.get_condition()
|
||||
|
||||
@property
|
||||
def watchpoint_id(self):
|
||||
"""
|
||||
Function to receive WatchpointHit watchpoint_id.
|
||||
|
||||
Returns:
|
||||
watchpoint_id of WatchpointHit instance (int).
|
||||
|
||||
Examples:
|
||||
>>> from mindspore.ccsrc.debug.debugger.offline_debug import dbg_services
|
||||
>>> watchpoint_hit = dbg_services.WatchpointHit(name="hit1",
|
||||
>>> slot=1,
|
||||
>>> condition=2,
|
||||
>>> watchpoint_id=3,
|
||||
>>> parameters=[param1, param2],
|
||||
>>> error_code=0,
|
||||
>>> device_id=1,
|
||||
>>> root_graph_id=1)
|
||||
>>> watchpoint_id = watchpoint_hit.watchpoint_id
|
||||
"""
|
||||
|
||||
return self.instance.get_watchpoint_id()
|
||||
|
||||
@property
|
||||
def parameters(self):
|
||||
"""
|
||||
Function to receive WatchpointHit parameters.
|
||||
|
||||
Returns:
|
||||
List of parameters of WatchpointHit instance (list).
|
||||
|
||||
Examples:
|
||||
>>> from mindspore.ccsrc.debug.debugger.offline_debug import dbg_services
|
||||
>>> watchpoint_hit = dbg_services.WatchpointHit(name="hit1",
|
||||
>>> slot=1,
|
||||
>>> condition=2,
|
||||
>>> watchpoint_id=3,
|
||||
>>> parameters=[param1, param2],
|
||||
>>> error_code=0,
|
||||
>>> device_id=1,
|
||||
>>> root_graph_id=1)
|
||||
>>> parameters = watchpoint_hit.parameters
|
||||
"""
|
||||
|
||||
params = self.instance.get_parameters()
|
||||
param_list = []
|
||||
for elem in params:
|
||||
tmp = Parameter(elem.get_name(),
|
||||
elem.get_disabled(),
|
||||
elem.get_value(),
|
||||
elem.get_hit(),
|
||||
elem.get_actual_value())
|
||||
param_list.append(tmp)
|
||||
return param_list
|
||||
|
||||
@property
|
||||
def error_code(self):
|
||||
"""
|
||||
Function to receive WatchpointHit error_code.
|
||||
|
||||
Returns:
|
||||
error_code of WatchpointHit instance (int).
|
||||
|
||||
Examples:
|
||||
>>> from mindspore.ccsrc.debug.debugger.offline_debug import dbg_services
|
||||
>>> watchpoint_hit = dbg_services.WatchpointHit(name="hit1",
|
||||
>>> slot=1,
|
||||
>>> condition=2,
|
||||
>>> watchpoint_id=3,
|
||||
>>> parameters=[param1, param2],
|
||||
>>> error_code=0,
|
||||
>>> device_id=1,
|
||||
>>> root_graph_id=1)
|
||||
>>> error_code = watchpoint_hit.error_code
|
||||
"""
|
||||
|
||||
return self.instance.get_error_code()
|
||||
|
||||
@property
|
||||
def device_id(self):
|
||||
"""
|
||||
Function to receive WatchpointHit device_id.
|
||||
|
||||
Returns:
|
||||
device_id of WatchpointHit instance (int).
|
||||
|
||||
Examples:
|
||||
>>> from mindspore.ccsrc.debug.debugger.offline_debug import dbg_services
|
||||
>>> watchpoint_hit = dbg_services.WatchpointHit(name="hit1",
|
||||
>>> slot=1,
|
||||
>>> condition=2,
|
||||
>>> watchpoint_id=3,
|
||||
>>> parameters=[param1, param2],
|
||||
>>> error_code=0,
|
||||
>>> device_id=1,
|
||||
>>> root_graph_id=1)
|
||||
>>> device_id = watchpoint_hit.device_id
|
||||
"""
|
||||
|
||||
return self.instance.get_device_id()
|
||||
|
||||
@property
|
||||
def root_graph_id(self):
|
||||
"""
|
||||
Function to receive WatchpointHit root_graph_id.
|
||||
|
||||
Returns:
|
||||
root_graph_id of WatchpointHit instance (int).
|
||||
|
||||
Examples:
|
||||
>>> from mindspore.ccsrc.debug.debugger.offline_debug import dbg_services
|
||||
>>> watchpoint_hit = dbg_services.WatchpointHit(name="hit1",
|
||||
>>> slot=1,
|
||||
>>> condition=2,
|
||||
>>> watchpoint_id=3,
|
||||
>>> parameters=[param1, param2],
|
||||
>>> error_code=0,
|
||||
>>> device_id=1,
|
||||
>>> root_graph_id=1)
|
||||
>>> root_graph_id = watchpoint_hit.root_graph_id
|
||||
"""
|
||||
|
||||
return self.instance.get_root_graph_id()
|
||||
|
||||
class Parameter():
|
||||
"""
|
||||
Parameter class.
|
||||
|
||||
Args:
|
||||
name (str): Name of the parameter.
|
||||
disabled (bool): Whether parameter is used in backend.
|
||||
value (float): Threshold value of the parameter.
|
||||
hit (bool): Whether this parameter triggered watchpoint (default is False).
|
||||
actual_value (float): Actual value of the parameter (default is 0.0).
|
||||
|
||||
Examples:
|
||||
>>> from mindspore.ccsrc.debug.debugger.offline_debug import dbg_services
|
||||
>>> parameter = dbg_services.Parameter(name="param",
|
||||
>>> disabled=False,
|
||||
>>> value=0.0,
|
||||
>>> hit=False,
|
||||
>>> actual_value=0.0)
|
||||
"""
|
||||
|
||||
@check_parameter_init
|
||||
def __init__(self, name, disabled, value, hit=False, actual_value=0.0):
|
||||
self.instance = cds.parameter(name, disabled, value, hit, actual_value)
|
||||
|
||||
@property
|
||||
def name(self):
|
||||
"""
|
||||
Function to receive Parameter name.
|
||||
|
||||
Returns:
|
||||
name of Parameter instance (str).
|
||||
|
||||
Examples:
|
||||
>>> from mindspore.ccsrc.debug.debugger.offline_debug import dbg_services
|
||||
>>> parameter = dbg_services.Parameter(name="param",
|
||||
>>> disabled=False,
|
||||
>>> value=0.0,
|
||||
>>> hit=False,
|
||||
>>> name = watchpoint_hit.name
|
||||
"""
|
||||
|
||||
return self.instance.get_name()
|
||||
|
||||
@property
|
||||
def disabled(self):
|
||||
"""
|
||||
Function to receive Parameter disabled value.
|
||||
|
||||
Returns:
|
||||
disabled of Parameter instance (bool).
|
||||
|
||||
Examples:
|
||||
>>> from mindspore.ccsrc.debug.debugger.offline_debug import dbg_services
|
||||
>>> parameter = dbg_services.Parameter(name="param",
|
||||
>>> disabled=False,
|
||||
>>> value=0.0,
|
||||
>>> hit=False,
|
||||
>>> disabled = watchpoint_hit.disabled
|
||||
"""
|
||||
|
||||
return self.instance.get_disabled()
|
||||
|
||||
@property
|
||||
def value(self):
|
||||
"""
|
||||
Function to receive Parameter value.
|
||||
|
||||
Returns:
|
||||
value of Parameter instance (float).
|
||||
|
||||
Examples:
|
||||
>>> from mindspore.ccsrc.debug.debugger.offline_debug import dbg_services
|
||||
>>> parameter = dbg_services.Parameter(name="param",
|
||||
>>> disabled=False,
|
||||
>>> value=0.0,
|
||||
>>> hit=False,
|
||||
>>> value = watchpoint_hit.value
|
||||
"""
|
||||
|
||||
return self.instance.get_value()
|
||||
|
||||
@property
|
||||
def hit(self):
|
||||
"""
|
||||
Function to receive Parameter hit value.
|
||||
|
||||
Returns:
|
||||
hit of Parameter instance (bool).
|
||||
|
||||
Examples:
|
||||
>>> from mindspore.ccsrc.debug.debugger.offline_debug import dbg_services
|
||||
>>> parameter = dbg_services.Parameter(name="param",
|
||||
>>> disabled=False,
|
||||
>>> value=0.0,
|
||||
>>> hit=False,
|
||||
>>> hit = watchpoint_hit.hit
|
||||
"""
|
||||
|
||||
return self.instance.get_hit()
|
||||
|
||||
@property
|
||||
def actual_value(self):
|
||||
"""
|
||||
Function to receive Parameter actual_value value.
|
||||
|
||||
Returns:
|
||||
actual_value of Parameter instance (float).
|
||||
|
||||
Examples:
|
||||
>>> from mindspore.ccsrc.debug.debugger.offline_debug import dbg_services
|
||||
>>> parameter = dbg_services.Parameter(name="param",
|
||||
>>> disabled=False,
|
||||
>>> value=0.0,
|
||||
>>> hit=False,
|
||||
>>> actual_value = watchpoint_hit.actual_value
|
||||
"""
|
||||
|
||||
return self.instance.get_actual_value()
|
|
@ -0,0 +1,123 @@
|
|||
# Copyright 2021 Huawei Technologies Co., Ltd
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
# ==============================================================================
|
||||
"""
|
||||
General Validator Helper Functions.
|
||||
"""
|
||||
import os
|
||||
import inspect
|
||||
|
||||
UINT32_MAX = 4294967295
|
||||
UINT32_MIN = 0
|
||||
UINT64_MAX = 18446744073709551615
|
||||
UINT64_MIN = 0
|
||||
|
||||
|
||||
def pad_arg_name(arg_name):
|
||||
if arg_name != "":
|
||||
arg_name = arg_name + " "
|
||||
return arg_name
|
||||
|
||||
|
||||
def check_value(arg, valid_range, arg_name=""):
|
||||
arg_name = pad_arg_name(arg_name)
|
||||
if arg < valid_range[0] or arg > valid_range[1]:
|
||||
raise ValueError(
|
||||
"Input {0}is not within the required interval of ({1} to {2}).".format(arg_name,
|
||||
valid_range[0], valid_range[1]))
|
||||
|
||||
|
||||
def check_uint32(arg, arg_name=""):
|
||||
type_check(arg, (int,), arg_name)
|
||||
check_value(arg, [UINT32_MIN, UINT32_MAX])
|
||||
|
||||
|
||||
def check_uint64(arg, arg_name=""):
|
||||
type_check(arg, (int,), arg_name)
|
||||
check_value(arg, [UINT64_MIN, UINT64_MAX])
|
||||
|
||||
|
||||
def check_dir(dataset_dir):
|
||||
if not os.path.isdir(dataset_dir) or not os.access(dataset_dir, os.R_OK):
|
||||
raise ValueError("The folder {} does not exist or permission denied!".format(dataset_dir))
|
||||
|
||||
|
||||
def parse_user_args(method, *args, **kwargs):
|
||||
"""
|
||||
Parse user arguments in a function.
|
||||
|
||||
Args:
|
||||
method (method): a callable function.
|
||||
args: user passed args.
|
||||
kwargs: user passed kwargs.
|
||||
|
||||
Returns:
|
||||
user_filled_args (list): values of what the user passed in for the arguments.
|
||||
ba.arguments (Ordered Dict): ordered dict of parameter and argument for what the user has passed.
|
||||
"""
|
||||
sig = inspect.signature(method)
|
||||
if 'self' in sig.parameters or 'cls' in sig.parameters:
|
||||
ba = sig.bind(method, *args, **kwargs)
|
||||
ba.apply_defaults()
|
||||
params = list(sig.parameters.keys())[1:]
|
||||
else:
|
||||
ba = sig.bind(*args, **kwargs)
|
||||
ba.apply_defaults()
|
||||
params = list(sig.parameters.keys())
|
||||
|
||||
user_filled_args = [ba.arguments.get(arg_value) for arg_value in params]
|
||||
return user_filled_args, ba.arguments
|
||||
|
||||
|
||||
def type_check(arg, types, arg_name):
|
||||
"""
|
||||
Check the type of the parameter.
|
||||
|
||||
Args:
|
||||
arg (Any) : any variable.
|
||||
types (tuple): tuple of all valid types for arg.
|
||||
arg_name (str): the name of arg.
|
||||
|
||||
Returns:
|
||||
Exception: when the type is not correct, otherwise nothing.
|
||||
"""
|
||||
# handle special case of booleans being a subclass of ints
|
||||
print_value = '\"\"' if repr(arg) == repr('') else arg
|
||||
|
||||
if int in types and bool not in types:
|
||||
if isinstance(arg, bool):
|
||||
raise TypeError("Argument {0} with value {1} is not of type {2}.".format(arg_name, print_value, types))
|
||||
if not isinstance(arg, types):
|
||||
raise TypeError("Argument {0} with value {1} is not of type {2}.".format(arg_name, print_value, types))
|
||||
|
||||
|
||||
def type_check_list(args, types, arg_names):
|
||||
"""
|
||||
Check the type of each parameter in the list.
|
||||
|
||||
Args:
|
||||
args (Union[list, tuple]): a list or tuple of any variable.
|
||||
types (tuple): tuple of all valid types for arg.
|
||||
arg_names (Union[list, tuple of str]): the names of args.
|
||||
|
||||
Returns:
|
||||
Exception: when the type is not correct, otherwise nothing.
|
||||
"""
|
||||
type_check(args, (list, tuple,), arg_names)
|
||||
if len(args) != len(arg_names) and not isinstance(arg_names, str):
|
||||
raise ValueError("List of arguments is not the same length as argument_names.")
|
||||
if isinstance(arg_names, str):
|
||||
arg_names = ["{0}[{1}]".format(arg_names, i) for i in range(len(args))]
|
||||
for arg, arg_name in zip(args, arg_names):
|
||||
type_check(arg, types, arg_name)
|
|
@ -0,0 +1,231 @@
|
|||
# Copyright 2021 Huawei Technologies Co., Ltd
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
# ==============================================================================
|
||||
"""
|
||||
Validator Functions for Offline Debugger APIs.
|
||||
"""
|
||||
from functools import wraps
|
||||
|
||||
import mindspore.offline_debug.dbg_services as cds
|
||||
from mindspore.offline_debug.mi_validator_helpers import parse_user_args, type_check, type_check_list, check_dir, check_uint32, check_uint64
|
||||
|
||||
|
||||
def check_init(method):
|
||||
"""Wrapper method to check the parameters of DbgServices init."""
|
||||
|
||||
@wraps(method)
|
||||
def new_method(self, *args, **kwargs):
|
||||
[dump_file_path, verbose], _ = parse_user_args(method, *args, **kwargs)
|
||||
|
||||
type_check(dump_file_path, (str,), "dump_file_path")
|
||||
type_check(verbose, (bool,), "verbose")
|
||||
check_dir(dump_file_path)
|
||||
|
||||
return method(self, *args, **kwargs)
|
||||
|
||||
return new_method
|
||||
|
||||
|
||||
def check_initialize(method):
|
||||
"""Wrapper method to check the parameters of DbgServices Initialize method."""
|
||||
|
||||
@wraps(method)
|
||||
def new_method(self, *args, **kwargs):
|
||||
[net_name, is_sync_mode], _ = parse_user_args(method, *args, **kwargs)
|
||||
|
||||
type_check(net_name, (str,), "net_name")
|
||||
type_check(is_sync_mode, (bool,), "is_sync_mode")
|
||||
|
||||
return method(self, *args, **kwargs)
|
||||
|
||||
return new_method
|
||||
|
||||
|
||||
def check_add_watchpoint(method):
|
||||
"""Wrapper method to check the parameters of DbgServices AddWatchpoint."""
|
||||
|
||||
@wraps(method)
|
||||
def new_method(self, *args, **kwargs):
|
||||
[id_value, watch_condition, check_node_list, parameter_list], _ = parse_user_args(method, *args, **kwargs)
|
||||
|
||||
check_uint32(id_value, "id")
|
||||
check_uint32(watch_condition, "watch_condition")
|
||||
type_check(check_node_list, (dict,), "check_node_list")
|
||||
for node_name, node_info in check_node_list.items():
|
||||
type_check(node_name, (str,), "node_name")
|
||||
type_check(node_info, (dict,), "node_info")
|
||||
for info_name, info_param in node_info.items():
|
||||
type_check(info_name, (str,), "node parameter name")
|
||||
if info_name in ["device_id"]:
|
||||
if isinstance(info_param, str):
|
||||
if info_param not in ["*"]:
|
||||
raise ValueError("Node parameter {} only accepts '*' as string.".format(info_name))
|
||||
else:
|
||||
for param in info_param:
|
||||
check_uint32(param, "device_id")
|
||||
elif info_name in ["root_graph_id"]:
|
||||
if isinstance(info_param, str):
|
||||
if info_param not in ["*"]:
|
||||
raise ValueError("Node parameter {} only accepts '*' as string.".format(info_name))
|
||||
else:
|
||||
for param in info_param:
|
||||
check_uint32(param, "root_graph_id")
|
||||
elif info_name in ["is_parameter"]:
|
||||
type_check(info_param, (bool,), "is_parameter")
|
||||
else:
|
||||
raise ValueError("Node parameter {} is not defined.".format(info_name))
|
||||
param_names = ["param_{0}".format(i) for i in range(len(parameter_list))]
|
||||
type_check_list(parameter_list, (cds.Parameter,), param_names)
|
||||
|
||||
return method(self, *args, **kwargs)
|
||||
|
||||
return new_method
|
||||
|
||||
|
||||
def check_remove_watchpoint(method):
|
||||
"""Wrapper method to check the parameters of DbgServices RemoveWatchpoint."""
|
||||
|
||||
@wraps(method)
|
||||
def new_method(self, *args, **kwargs):
|
||||
[id_value], _ = parse_user_args(method, *args, **kwargs)
|
||||
|
||||
check_uint32(id_value, "id")
|
||||
|
||||
return method(self, *args, **kwargs)
|
||||
|
||||
return new_method
|
||||
|
||||
|
||||
def check_check_watchpoints(method):
|
||||
"""Wrapper method to check the parameters of DbgServices CheckWatchpoint."""
|
||||
|
||||
@wraps(method)
|
||||
def new_method(self, *args, **kwargs):
|
||||
[iteration], _ = parse_user_args(method, *args, **kwargs)
|
||||
|
||||
check_uint32(iteration, "iteration")
|
||||
|
||||
return method(self, *args, **kwargs)
|
||||
|
||||
return new_method
|
||||
|
||||
|
||||
def check_read_tensors(method):
|
||||
"""Wrapper method to check the parameters of DbgServices ReadTensors."""
|
||||
|
||||
@wraps(method)
|
||||
def new_method(self, *args, **kwargs):
|
||||
[info_list], _ = parse_user_args(method, *args, **kwargs)
|
||||
|
||||
info_names = ["info_{0}".format(i) for i in range(len(info_list))]
|
||||
type_check_list(info_list, (cds.TensorInfo,), info_names)
|
||||
|
||||
return method(self, *args, **kwargs)
|
||||
|
||||
return new_method
|
||||
|
||||
|
||||
def check_initialize_done(method):
|
||||
"""Wrapper method to check if initlize is done for DbgServices."""
|
||||
|
||||
@wraps(method)
|
||||
def new_method(self, *args, **kwargs):
|
||||
|
||||
if not self.initialized:
|
||||
raise RuntimeError("Inilize should be called before any other methods of DbgServices!")
|
||||
return method(self, *args, **kwargs)
|
||||
|
||||
return new_method
|
||||
|
||||
|
||||
def check_tensor_info_init(method):
|
||||
"""Wrapper method to check the parameters of DbgServices TensorInfo init."""
|
||||
|
||||
@wraps(method)
|
||||
def new_method(self, *args, **kwargs):
|
||||
[node_name, slot, iteration, device_id, root_graph_id,
|
||||
is_parameter], _ = parse_user_args(method, *args, **kwargs)
|
||||
|
||||
type_check(node_name, (str,), "node_name")
|
||||
check_uint32(slot, "slot")
|
||||
check_uint32(iteration, "iteration")
|
||||
check_uint32(device_id, "device_id")
|
||||
check_uint32(root_graph_id, "root_graph_id")
|
||||
type_check(is_parameter, (bool,), "is_parameter")
|
||||
|
||||
return method(self, *args, **kwargs)
|
||||
|
||||
return new_method
|
||||
|
||||
|
||||
def check_tensor_data_init(method):
|
||||
"""Wrapper method to check the parameters of DbgServices TensorData init."""
|
||||
|
||||
@wraps(method)
|
||||
def new_method(self, *args, **kwargs):
|
||||
[data_ptr, data_size, dtype, shape], _ = parse_user_args(method, *args, **kwargs)
|
||||
|
||||
type_check(data_ptr, (bytes,), "data_ptr")
|
||||
check_uint64(data_size, "data_size")
|
||||
type_check(dtype, (int,), "dtype")
|
||||
shape_names = ["shape_{0}".format(i) for i in range(len(shape))]
|
||||
type_check_list(shape, (int,), shape_names)
|
||||
|
||||
if len(data_ptr) != data_size:
|
||||
raise ValueError("data_ptr length ({0}) is not equal to data_size ({1}).".format(len(data_ptr), data_size))
|
||||
|
||||
return method(self, *args, **kwargs)
|
||||
|
||||
return new_method
|
||||
|
||||
|
||||
def check_watchpoint_hit_init(method):
|
||||
"""Wrapper method to check the parameters of DbgServices WatchpointHit init."""
|
||||
|
||||
@wraps(method)
|
||||
def new_method(self, *args, **kwargs):
|
||||
[name, slot, condition, watchpoint_id,
|
||||
parameters, error_code, device_id, root_graph_id], _ = parse_user_args(method, *args, **kwargs)
|
||||
|
||||
type_check(name, (str,), "name")
|
||||
check_uint32(slot, "slot")
|
||||
type_check(condition, (int,), "condition")
|
||||
check_uint32(watchpoint_id, "watchpoint_id")
|
||||
param_names = ["param_{0}".format(i) for i in range(len(parameters))]
|
||||
type_check_list(parameters, (cds.Parameter,), param_names)
|
||||
type_check(error_code, (int,), "error_code")
|
||||
check_uint32(device_id, "device_id")
|
||||
check_uint32(root_graph_id, "root_graph_id")
|
||||
|
||||
return method(self, *args, **kwargs)
|
||||
|
||||
return new_method
|
||||
|
||||
|
||||
def check_parameter_init(method):
|
||||
"""Wrapper method to check the parameters of DbgServices Parameter init."""
|
||||
|
||||
@wraps(method)
|
||||
def new_method(self, *args, **kwargs):
|
||||
[name, disabled, value, hit, actual_value], _ = parse_user_args(method, *args, **kwargs)
|
||||
|
||||
type_check(name, (str,), "name")
|
||||
type_check(disabled, (bool,), "disabled")
|
||||
type_check(value, (float,), "value")
|
||||
type_check(hit, (bool,), "hit")
|
||||
type_check(actual_value, (float,), "actual_value")
|
||||
|
||||
return method(self, *args, **kwargs)
|
||||
|
||||
return new_method
|
Loading…
Reference in New Issue