forked from mindspore-Ecosystem/mindspore
fix yolov3 tiny multi root graph seg fault
This commit is contained in:
parent
65dabb58ef
commit
1f4e1d2be7
|
@ -84,45 +84,57 @@ void DebugServices::RemoveWatchpoint(unsigned int id) {
|
|||
|
||||
std::unique_ptr<ITensorSummary> GetSummaryPtr(const std::shared_ptr<TensorData> &tensor,
|
||||
void *const previous_tensor_ptr, uint32_t num_elements,
|
||||
int tensor_dtype) {
|
||||
uint32_t prev_num_elements, int tensor_dtype) {
|
||||
switch (tensor_dtype) {
|
||||
case DbgDataType::DT_UINT8: {
|
||||
return std::make_unique<TensorSummary<uint8_t>>(tensor->GetDataPtr(), previous_tensor_ptr, num_elements);
|
||||
return std::make_unique<TensorSummary<uint8_t>>(tensor->GetDataPtr(), previous_tensor_ptr, num_elements,
|
||||
prev_num_elements);
|
||||
}
|
||||
case DbgDataType::DT_INT8: {
|
||||
return std::make_unique<TensorSummary<int8_t>>(tensor->GetDataPtr(), previous_tensor_ptr, num_elements);
|
||||
return std::make_unique<TensorSummary<int8_t>>(tensor->GetDataPtr(), previous_tensor_ptr, num_elements,
|
||||
prev_num_elements);
|
||||
}
|
||||
case DbgDataType::DT_UINT16: {
|
||||
return std::make_unique<TensorSummary<uint16_t>>(tensor->GetDataPtr(), previous_tensor_ptr, num_elements);
|
||||
return std::make_unique<TensorSummary<uint16_t>>(tensor->GetDataPtr(), previous_tensor_ptr, num_elements,
|
||||
prev_num_elements);
|
||||
}
|
||||
case DbgDataType::DT_INT16: {
|
||||
return std::make_unique<TensorSummary<int16_t>>(tensor->GetDataPtr(), previous_tensor_ptr, num_elements);
|
||||
return std::make_unique<TensorSummary<int16_t>>(tensor->GetDataPtr(), previous_tensor_ptr, num_elements,
|
||||
prev_num_elements);
|
||||
}
|
||||
case DbgDataType::DT_UINT32: {
|
||||
return std::make_unique<TensorSummary<uint32_t>>(tensor->GetDataPtr(), previous_tensor_ptr, num_elements);
|
||||
return std::make_unique<TensorSummary<uint32_t>>(tensor->GetDataPtr(), previous_tensor_ptr, num_elements,
|
||||
prev_num_elements);
|
||||
}
|
||||
case DbgDataType::DT_INT32:
|
||||
case DbgDataType::DT_BASE_INT: {
|
||||
return std::make_unique<TensorSummary<int32_t>>(tensor->GetDataPtr(), previous_tensor_ptr, num_elements);
|
||||
return std::make_unique<TensorSummary<int32_t>>(tensor->GetDataPtr(), previous_tensor_ptr, num_elements,
|
||||
prev_num_elements);
|
||||
}
|
||||
case DbgDataType::DT_UINT64: {
|
||||
return std::make_unique<TensorSummary<uint64_t>>(tensor->GetDataPtr(), previous_tensor_ptr, num_elements);
|
||||
return std::make_unique<TensorSummary<uint64_t>>(tensor->GetDataPtr(), previous_tensor_ptr, num_elements,
|
||||
prev_num_elements);
|
||||
}
|
||||
case DbgDataType::DT_INT64: {
|
||||
return std::make_unique<TensorSummary<int64_t>>(tensor->GetDataPtr(), previous_tensor_ptr, num_elements);
|
||||
return std::make_unique<TensorSummary<int64_t>>(tensor->GetDataPtr(), previous_tensor_ptr, num_elements,
|
||||
prev_num_elements);
|
||||
}
|
||||
case DbgDataType::DT_FLOAT16: {
|
||||
return std::make_unique<TensorSummary<float16>>(tensor->GetDataPtr(), previous_tensor_ptr, num_elements);
|
||||
return std::make_unique<TensorSummary<float16>>(tensor->GetDataPtr(), previous_tensor_ptr, num_elements,
|
||||
prev_num_elements);
|
||||
}
|
||||
case DbgDataType::DT_FLOAT32:
|
||||
case DbgDataType::DT_BASE_FLOAT: {
|
||||
return std::make_unique<TensorSummary<float>>(tensor->GetDataPtr(), previous_tensor_ptr, num_elements);
|
||||
return std::make_unique<TensorSummary<float>>(tensor->GetDataPtr(), previous_tensor_ptr, num_elements,
|
||||
prev_num_elements);
|
||||
}
|
||||
case DbgDataType::DT_FLOAT64: {
|
||||
return std::make_unique<TensorSummary<double>>(tensor->GetDataPtr(), previous_tensor_ptr, num_elements);
|
||||
return std::make_unique<TensorSummary<double>>(tensor->GetDataPtr(), previous_tensor_ptr, num_elements,
|
||||
prev_num_elements);
|
||||
}
|
||||
case DbgDataType::DT_BOOL: {
|
||||
return std::make_unique<TensorSummary<bool>>(tensor->GetDataPtr(), previous_tensor_ptr, num_elements);
|
||||
return std::make_unique<TensorSummary<bool>>(tensor->GetDataPtr(), previous_tensor_ptr, num_elements,
|
||||
prev_num_elements);
|
||||
}
|
||||
default:
|
||||
MS_LOG(INFO) << "Unsupported tensor type";
|
||||
|
@ -132,7 +144,8 @@ std::unique_ptr<ITensorSummary> GetSummaryPtr(const std::shared_ptr<TensorData>
|
|||
}
|
||||
|
||||
#ifdef OFFLINE_DBG_MODE
|
||||
void *DebugServices::GetPrevTensor(const std::shared_ptr<TensorData> &tensor, bool previous_iter_tensor_needed) {
|
||||
void *DebugServices::GetPrevTensor(const std::shared_ptr<TensorData> &tensor, bool previous_iter_tensor_needed,
|
||||
uint32_t *prev_num_elements) {
|
||||
void *previous_tensor_ptr = nullptr;
|
||||
std::shared_ptr<TensorData> tensor_prev;
|
||||
if (previous_iter_tensor_needed && tensor->GetIteration() >= 1) {
|
||||
|
@ -155,6 +168,7 @@ void *DebugServices::GetPrevTensor(const std::shared_ptr<TensorData> &tensor, bo
|
|||
tensor_prev.reset();
|
||||
} else {
|
||||
previous_tensor_ptr = tensor_prev->GetDataPtr();
|
||||
*prev_num_elements = tensor_prev->GetNumElements();
|
||||
}
|
||||
}
|
||||
return previous_tensor_ptr;
|
||||
|
@ -247,16 +261,21 @@ void DebugServices::CheckWatchpointsForTensor(
|
|||
// no wp set on current tensor
|
||||
if (watchpoints_to_check.empty()) continue;
|
||||
uint32_t num_elements = tensor->GetNumElements();
|
||||
uint32_t prev_num_elements = 0;
|
||||
void *previous_tensor_ptr = nullptr;
|
||||
#ifdef OFFLINE_DBG_MODE
|
||||
void *previous_tensor_ptr = GetPrevTensor(tensor, previous_iter_tensor_needed);
|
||||
previous_tensor_ptr = GetPrevTensor(tensor, previous_iter_tensor_needed, &prev_num_elements);
|
||||
#else
|
||||
void *previous_tensor_ptr =
|
||||
tensor_loader_->GetPrevTensor(tensor_name) ? tensor_loader_->GetPrevTensor(tensor_name)->GetDataPtr() : nullptr;
|
||||
std::shared_ptr<TensorData> prev_tensor_data = tensor_loader_->GetPrevTensor(tensor_name);
|
||||
if (prev_tensor_data) {
|
||||
previous_tensor_ptr = prev_tensor_data->GetDataPtr();
|
||||
prev_num_elements = prev_tensor_data->GetNumElements();
|
||||
}
|
||||
#endif
|
||||
|
||||
std::unique_ptr<ITensorSummary> base_summary_ptr;
|
||||
if (!(watchpoints_to_check.size() == 1 && watchpoints_to_check[0].condition.type == IS_OVERFLOW)) {
|
||||
base_summary_ptr = GetSummaryPtr(tensor, previous_tensor_ptr, num_elements, tensor_dtype);
|
||||
base_summary_ptr = GetSummaryPtr(tensor, previous_tensor_ptr, num_elements, prev_num_elements, tensor_dtype);
|
||||
if (base_summary_ptr != nullptr) {
|
||||
base_summary_ptr->SummarizeTensor(watchpoints_to_check);
|
||||
}
|
||||
|
|
|
@ -238,7 +238,8 @@ class DebugServices {
|
|||
std::vector<std::shared_ptr<TensorData>> ReadNeededDumpedTensors(unsigned int iteration,
|
||||
std::vector<std::string> *async_file_pool);
|
||||
|
||||
void *GetPrevTensor(const std::shared_ptr<TensorData> &tensor, bool previous_iter_tensor_needed);
|
||||
void *GetPrevTensor(const std::shared_ptr<TensorData> &tensor, bool previous_iter_tensor_needed,
|
||||
uint32_t *prev_num_elements);
|
||||
|
||||
void ReadTensorFromNpy(const std::string &file_name, std::string *tensor_type, std::size_t *size,
|
||||
std::vector<int64_t> *shape, std::vector<char> **data_buffer);
|
||||
|
|
|
@ -93,10 +93,12 @@ double VarianceAndMeanCalculator::GetVariance() const {
|
|||
double VarianceAndMeanCalculator::GetStandardDeviation() { return sqrt(GetVariance()); }
|
||||
|
||||
template <typename T>
|
||||
TensorSummary<T>::TensorSummary(void *current_tensor_ptr, void *const previous_tensor_ptr, uint32_t num_elements)
|
||||
TensorSummary<T>::TensorSummary(void *current_tensor_ptr, void *const previous_tensor_ptr, uint32_t num_elements,
|
||||
uint32_t prev_num_elements)
|
||||
: current_tensor_ptr(reinterpret_cast<T *>(current_tensor_ptr)),
|
||||
prev_tensor_ptr(reinterpret_cast<T *>(previous_tensor_ptr)),
|
||||
num_elements(num_elements),
|
||||
prev_num_elements_(prev_num_elements),
|
||||
min(std::numeric_limits<double>::max()),
|
||||
max(std::numeric_limits<double>::lowest()),
|
||||
inf_count(0),
|
||||
|
@ -110,8 +112,14 @@ void TensorSummary<T>::SummarizeTensor(const std::vector<DebugServices::watchpoi
|
|||
InitCalculators(wps);
|
||||
for (size_t i = 0; i < num_elements; ++i) {
|
||||
auto current_value = static_cast<double>(current_tensor_ptr[i]);
|
||||
double previous_value =
|
||||
prev_tensor_ptr ? static_cast<double>(prev_tensor_ptr[i]) : std::numeric_limits<double>::quiet_NaN();
|
||||
double previous_value = std::numeric_limits<double>::quiet_NaN();
|
||||
if (prev_tensor_ptr) {
|
||||
if (num_elements == prev_num_elements_) {
|
||||
previous_value = static_cast<double>(prev_tensor_ptr[i]);
|
||||
} else {
|
||||
MS_LOG(DEBUG) << "Current and previous tensor are not the same size.";
|
||||
}
|
||||
}
|
||||
inf_count += std::isinf(current_value);
|
||||
nan_count += std::isnan(current_value);
|
||||
zero_count += (current_value == 0);
|
||||
|
|
|
@ -99,7 +99,7 @@ class TensorSummary : public ITensorSummary {
|
|||
public:
|
||||
TensorSummary() = default;
|
||||
~TensorSummary() override = default;
|
||||
TensorSummary(void *, void *, uint32_t);
|
||||
TensorSummary(void *, void *, uint32_t, uint32_t);
|
||||
void SummarizeTensor(const std::vector<DebugServices::watchpoint_t> &) override;
|
||||
// returns hit, error_code, parameter_list
|
||||
std::tuple<bool, int, std::vector<DebugServices::parameter_t>> IsWatchpointHit(DebugServices::watchpoint_t) override;
|
||||
|
@ -108,6 +108,7 @@ class TensorSummary : public ITensorSummary {
|
|||
T *current_tensor_ptr;
|
||||
T *prev_tensor_ptr;
|
||||
uint32_t num_elements;
|
||||
uint32_t prev_num_elements_;
|
||||
double min;
|
||||
double max;
|
||||
uint32_t inf_count;
|
||||
|
|
Loading…
Reference in New Issue