!27034 unified runtime control flow optimize the error info

Merge pull request !27034 from limingqi107/bug_fix4
This commit is contained in:
i-robot 2021-12-01 18:29:31 +00:00 committed by Gitee
commit c0d4e5cf0f
8 changed files with 81 additions and 41 deletions

View File

@ -66,7 +66,7 @@ void DumpAbstractActor(const AbstractActor *actor, std::ofstream &ofs) {
if (actor->output_data_arrows().size() != actor->output_data_nodes().size()) {
MS_LOG(EXCEPTION) << "The size of output data arrows is not equal to the output nodes, arrow num:"
<< actor->output_data_arrows().size() << " node num:" << actor->output_data_nodes().size()
<< " for actor:" << actor->GetAID();
<< " for actor:" << actor->GetAID().Name();
}
if (actor->output_data_arrows().size() > 0) {
ofs << "\t\toutput_data_arrows:" << actor->output_data_arrows().size() << "\n ";

View File

@ -103,8 +103,10 @@ void ControlActor::FetchInput(OpContext<DeviceTensor> *const context) {
for (auto &input_data : data_iter->second) {
MS_EXCEPTION_IF_NULL(input_data);
if (IntToSize(input_data->index_) >= input_device_tensors_.size()) {
MS_LOG(ERROR) << "Invalid index, need:" << input_data->index_ << " current:" << input_device_tensors_.size()
<< " for actor:" << GetAID();
std::string error_info = "Invalid index, need:" + std::to_string(input_data->index_) +
" current:" + std::to_string(input_device_tensors_.size()) +
" for actor:" + GetAID().Name();
SET_OPCONTEXT_FAIL_RET_WITH_ERROR((*context), error_info);
}
input_device_tensors_[input_data->index_] = input_data->data_;
@ -115,8 +117,10 @@ void ControlActor::FetchInput(OpContext<DeviceTensor> *const context) {
for (auto &local_device_tensor : local_device_tensors_) {
MS_EXCEPTION_IF_NULL(local_device_tensor.second);
if (local_device_tensor.first >= input_device_tensors_.size()) {
MS_LOG(ERROR) << "Invalid local index:" << local_device_tensor.first
<< " current:" << local_device_tensors_.size() << " for actor:" << GetAID();
std::string error_info = "Invalid local index:" + std::to_string(local_device_tensor.first) +
" current:" + std::to_string(local_device_tensors_.size()) +
" for actor:" + GetAID().Name();
SET_OPCONTEXT_FAIL_RET_WITH_ERROR((*context), error_info);
}
input_device_tensors_[local_device_tensor.first] = local_device_tensor.second;
}
@ -128,12 +132,16 @@ void ControlActor::FetchInput(OpContext<DeviceTensor> *const context) {
auto device_tensor = DeviceTensorStore::GetInstance().Fetch(device_tensor_store_key.second.get(),
device_context->GetDeviceAddressType());
if (device_tensor == nullptr) {
MS_LOG(ERROR) << GetAID() << " get device tensor store failed: " << device_tensor_store_key.second->DebugString();
std::string error_info =
GetAID().Name() + " get device tensor store failed: " + device_tensor_store_key.second->DebugString();
SET_OPCONTEXT_FAIL_RET_WITH_ERROR((*context), error_info);
}
if (device_tensor_store_key.first >= input_device_tensors_.size()) {
MS_LOG(ERROR) << "The input index is out of range, need:" << device_tensor_store_key.first
<< " current:" << input_device_tensors_.size() << " for actor:" << GetAID();
std::string error_info =
"The input index is out of range, need:" + std::to_string(device_tensor_store_key.first) +
" current:" + std::to_string(input_device_tensors_.size()) + " for actor:" + GetAID().Name();
SET_OPCONTEXT_FAIL_RET_WITH_ERROR((*context), error_info);
}
input_device_tensors_[device_tensor_store_key.first] = device_tensor;
}
@ -156,8 +164,10 @@ void ControlActor::FetchInput(OpContext<DeviceTensor> *const context) {
for (const auto &input_partial : partial_iter->second) {
MS_EXCEPTION_IF_NULL(input_partial.second->func_graph_);
if (input_partial.first >= input_partials_.size()) {
MS_LOG(ERROR) << "Invalid partial index:" << input_partial.first << " vector size:" << input_partials_.size()
<< " for actor:" << GetAID();
std::string error_info = "Invalid partial index:" + std::to_string(input_partial.first) +
" vector size:" + std::to_string(input_partials_.size()) +
" for actor:" + GetAID().Name();
SET_OPCONTEXT_FAIL_RET_WITH_ERROR((*context), error_info);
}
input_partials_[input_partial.first] = input_partial.second;
}
@ -165,8 +175,10 @@ void ControlActor::FetchInput(OpContext<DeviceTensor> *const context) {
// Fetch input partial from local partial.
for (const auto &local_partial : local_partials_) {
if (local_partial.first >= input_partials_.size()) {
MS_LOG(ERROR) << "Invalid partial index:" << local_partial.first << " vector size:" << input_partials_.size()
<< " for actor:" << GetAID();
std::string error_info = "Invalid partial index:" + std::to_string(local_partial.first) +
" vector size:" + std::to_string(input_partials_.size()) +
" for actor:" + GetAID().Name();
SET_OPCONTEXT_FAIL_RET_WITH_ERROR((*context), error_info);
}
MS_EXCEPTION_IF_NULL(local_partial.second);
*(input_partials_[local_partial.first]) = *(local_partial.second);
@ -215,8 +227,9 @@ void ControlActor::SendOutput(OpContext<DeviceTensor> *const context) {
for (const auto &partial_arrow : output_partial_arrows_) {
MS_EXCEPTION_IF_NULL(partial_arrow);
if (IntToSize(partial_arrow->from_output_index_) >= input_partials_.size()) {
MS_LOG(ERROR) << "Invalid partial input:" << partial_arrow->from_output_index_
<< " current:" << input_partials_.size() << " for actor:" << GetAID();
std::string error_info = "Invalid partial input:" + std::to_string(partial_arrow->from_output_index_) +
" current:" + std::to_string(input_partials_.size()) + " for actor:" + GetAID().Name();
SET_OPCONTEXT_FAIL_RET_WITH_ERROR((*context), error_info);
}
auto output_partial = input_partials_[partial_arrow->from_output_index_];
MS_EXCEPTION_IF_NULL(output_partial->func_graph_);

View File

@ -82,8 +82,10 @@ void EntranceActor::FetchInput(OpContext<DeviceTensor> *const context) {
for (auto &input_data : data_iter->second) {
MS_EXCEPTION_IF_NULL(input_data);
if (IntToSize(input_data->index_) >= input_device_tensors_.size()) {
MS_LOG(ERROR) << "The input index is out of range, need:" << input_data->index_
<< " current:" << input_device_tensors_.size() << " for actor:" << GetAID();
std::string error_info = "The input index is out of range, need:" + std::to_string(input_data->index_) +
" current:" + std::to_string(input_device_tensors_.size()) +
" for actor:" + GetAID().Name();
SET_OPCONTEXT_FAIL_RET_WITH_ERROR((*context), error_info);
}
MS_EXCEPTION_IF_NULL(input_data->data_);
input_device_tensors_[input_data->index_] = input_data->data_;
@ -98,13 +100,17 @@ void EntranceActor::FetchInput(OpContext<DeviceTensor> *const context) {
// Collect the device tensors.
if (device_tensors.size() + partials.size() != formal_parameters_.size()) {
MS_LOG(ERROR) << "Invalid input num, need:" << formal_parameters_.size()
<< " device tensor num:" << device_tensors.size() << " partial num:" << partials.size();
std::string error_info = "Invalid input num, need:" + std::to_string(formal_parameters_.size()) +
" device tensor num:" + std::to_string(device_tensors.size()) +
" partial num:" + std::to_string(partials.size());
SET_OPCONTEXT_FAIL_RET_WITH_ERROR((*context), error_info);
}
for (const auto &device_tensor : device_tensors) {
if (device_tensor.first >= input_device_tensors_.size()) {
MS_LOG(ERROR) << "Invalid device tensor index:" << device_tensor.first
<< " vector size:" << input_device_tensors_.size() << " for actor:" << GetAID();
std::string error_info = "Invalid device tensor index:" + std::to_string(device_tensor.first) +
" vector size:" + std::to_string(input_device_tensors_.size()) +
" for actor:" + GetAID().Name();
SET_OPCONTEXT_FAIL_RET_WITH_ERROR((*context), error_info);
}
input_device_tensors_[device_tensor.first] = device_tensor.second;
}
@ -112,8 +118,9 @@ void EntranceActor::FetchInput(OpContext<DeviceTensor> *const context) {
// Collect the partials.
for (const auto &partial : partials) {
if (partial.first >= input_partials_.size()) {
MS_LOG(ERROR) << "Invalid partial index:" << partial.first << " vector size:" << partials.size()
<< " for actor:" << GetAID();
std::string error_info = "Invalid partial index:" + std::to_string(partial.first) +
" vector size:" + std::to_string(partials.size()) + " for actor:" + GetAID().Name();
SET_OPCONTEXT_FAIL_RET_WITH_ERROR((*context), error_info);
}
input_partials_[partial.first] = partial.second;
}
@ -126,7 +133,8 @@ void EntranceActor::FetchInput(OpContext<DeviceTensor> *const context) {
}
const auto &data = input_device_tensors_[i];
if (data == nullptr) {
MS_LOG(ERROR) << "Input data index:" << i << " for actor:" << GetAID() << " is empty!";
std::string error_info = "Input data index:" + std::to_string(i) + " for actor:" + GetAID().Name() + " is empty!";
SET_OPCONTEXT_FAIL_RET_WITH_ERROR((*context), error_info);
}
for (auto &output_data : output_data_by_output_index_[i]) {
MS_EXCEPTION_IF_NULL(output_data);

View File

@ -37,7 +37,7 @@ void ExitActor::Init() {
void ExitActor::FetchInput(OpContext<DeviceTensor> *const context) {
MS_EXCEPTION_IF_NULL(context);
ControlActor::FetchInput(context);
CopyDeviceAddress();
CopyDeviceAddress(context);
auto data_iter = output_branch_data_.find(output_branch_id_);
if (data_iter != output_branch_data_.end()) {
@ -79,8 +79,9 @@ void ExitActor::SendOutput(OpContext<DeviceTensor> *const context) {
for (const auto &partial_arrow : partial_iter->second) {
MS_EXCEPTION_IF_NULL(partial_arrow);
if (IntToSize(partial_arrow->from_output_index_) >= input_partials_.size()) {
MS_LOG(ERROR) << "Invalid partial input:" << partial_arrow->from_output_index_
<< " current:" << input_partials_.size() << " for actor:" << GetAID();
std::string error_info = "Invalid partial input:" + std::to_string(partial_arrow->from_output_index_) +
" current:" + std::to_string(input_partials_.size()) + " for actor:" + GetAID().Name();
SET_OPCONTEXT_FAIL_RET_WITH_ERROR((*context), error_info);
}
auto output_partial = input_partials_[partial_arrow->from_output_index_];
MS_EXCEPTION_IF_NULL(output_partial->func_graph_);
@ -90,14 +91,17 @@ void ExitActor::SendOutput(OpContext<DeviceTensor> *const context) {
}
}
void ExitActor::CopyDeviceAddress() {
void ExitActor::CopyDeviceAddress(OpContext<DeviceTensor> *const context) {
MS_EXCEPTION_IF_NULL(context);
// If node is not empty, it is the exit of funcgraph, no need to create device address.
if (node_ != nullptr) {
return;
}
if (input_device_tensors_.size() != is_need_copy_device_tensors_.size()) {
MS_LOG(ERROR) << "Invalid input device tensor size:" << input_device_tensors_.size()
<< " need:" << is_need_copy_device_tensors_.size() << " for actor:" << GetAID();
std::string error_info = "Invalid input device tensor size:" + std::to_string(input_device_tensors_.size()) +
" need:" + std::to_string(is_need_copy_device_tensors_.size()) +
" for actor:" + GetAID().Name();
SET_OPCONTEXT_FAIL_RET_WITH_ERROR((*context), error_info);
}
std::vector<DeviceTensor *> new_device_tensors;

View File

@ -57,7 +57,7 @@ class ExitActor : public ControlActor {
private:
friend class ControlNodeScheduler;
void CopyDeviceAddress();
void CopyDeviceAddress(OpContext<DeviceTensor> *const context);
// Exit actor will send to different actors according to different callers, so the output data, control,
// and partial arrows will have branch.

View File

@ -64,13 +64,17 @@ void GatherActor::SendOutput(OpContext<DeviceTensor> *const context) {
// an real parameter, so the subsequent index needs to be reduced by one.
for (auto &device_tensor : output.device_tensors_) {
if (device_tensor.first == 0) {
MS_LOG(ERROR) << "Invalid device tensor index:" << device_tensor.first << " for actor:" << GetAID();
std::string error_info =
"Invalid device tensor index:" + std::to_string(device_tensor.first) + " for actor:" + GetAID().Name();
SET_OPCONTEXT_FAIL_RET_WITH_ERROR((*context), error_info);
}
device_tensor.first--;
}
for (auto &partial : output.partials_) {
if (partial.first == 0) {
MS_LOG(ERROR) << "Invalid partial index:" << partial.first << " for actor:" << GetAID();
std::string error_info =
"Invalid partial index:" + std::to_string(partial.first) + " for actor:" + GetAID().Name();
SET_OPCONTEXT_FAIL_RET_WITH_ERROR((*context), error_info);
}
partial.first--;
}

View File

@ -159,7 +159,8 @@ bool StackActor::CheckRunningCondition(const OpContext<DeviceTensor> *context) c
auto iter = input_branch_ids_.find(context->sequential_num_);
if (iter == input_branch_ids_.end() || iter->second.empty()) {
MS_LOG(ERROR) << "There is no branch id for actor:" << GetAID();
MS_LOG(ERROR) << "There is no branch id for actor:" << GetAID().Name();
return false;
}
size_t branch_id_size = iter->second.size();
if (std::any_of(data_iter->second.begin(), data_iter->second.end(),
@ -179,7 +180,8 @@ bool StackActor::CheckRunningCondition(const OpContext<DeviceTensor> *context) c
auto iter = input_branch_ids_.find(context->sequential_num_);
if (iter == input_branch_ids_.end() || iter->second.empty()) {
MS_LOG(ERROR) << "There is no branch id for actor:" << GetAID();
MS_LOG(ERROR) << "There is no branch id for actor:" << GetAID().Name();
return false;
}
size_t branch_id_size = iter->second.size();
if (std::any_of(partial_iter->second.begin(), partial_iter->second.end(),
@ -195,13 +197,15 @@ void StackActor::FetchInput(OpContext<DeviceTensor> *const context) {
if (input_parameter_data_num_ != 0) {
const auto &data_iter = input_parameter_data_.find(context->sequential_num_);
if (data_iter == input_parameter_data_.end()) {
MS_LOG(ERROR) << "Invalid input for actor:" << GetAID();
std::string error_info = "Invalid input for actor:" + GetAID().Name();
SET_OPCONTEXT_FAIL_RET_WITH_ERROR((*context), error_info);
}
for (const auto &one_stack : data_iter->second) {
if (one_stack.first >= input_parameter_data_num_ + device_tensor_store_keys_.size() +
local_device_tensors_.size() + input_parameter_partial_num_) {
MS_LOG(ERROR) << "Invalid input index:" << one_stack.first << " need:" << input_parameter_data_num_
<< " for actor:" << GetAID();
std::string error_info = "Invalid input index:" + std::to_string(one_stack.first) +
" need:" + std::to_string(input_parameter_data_num_) + " for actor:" + GetAID().Name();
SET_OPCONTEXT_FAIL_RET_WITH_ERROR((*context), error_info);
}
input_device_tensors_[one_stack.first] = one_stack.second.top();
}
@ -210,13 +214,16 @@ void StackActor::FetchInput(OpContext<DeviceTensor> *const context) {
if (input_parameter_partial_num_ != 0) {
const auto &partial_iter = input_parameter_partial_.find(context->sequential_num_);
if (partial_iter == input_parameter_partial_.end()) {
MS_LOG(ERROR) << "Invalid input for actor:" << GetAID();
std::string error_info = "Invalid input for actor:" + GetAID().Name();
SET_OPCONTEXT_FAIL_RET_WITH_ERROR((*context), error_info);
}
for (const auto &one_stack : partial_iter->second) {
if (one_stack.first >= input_parameter_data_num_ + device_tensor_store_keys_.size() +
local_device_tensors_.size() + input_parameter_partial_num_) {
MS_LOG(ERROR) << "Invalid input index:" << one_stack.first << " need:" << input_parameter_partial_
<< " for actor:" << GetAID();
std::string error_info = "Invalid input index:" + std::to_string(one_stack.first) +
" need:" + std::to_string(input_parameter_partial_num_) +
" for actor:" + GetAID().Name();
SET_OPCONTEXT_FAIL_RET_WITH_ERROR((*context), error_info);
}
input_partials_[one_stack.first] = one_stack.second.top();
}
@ -232,11 +239,13 @@ void StackActor::EraseInput(const OpContext<DeviceTensor> *const context) {
const auto &data_iter = input_parameter_data_.find(context->sequential_num_);
if (data_iter == input_parameter_data_.end()) {
MS_LOG(ERROR) << "Invalid input for actor:" << GetAID();
return;
}
for (auto &one_stack : data_iter->second) {
if (one_stack.second.empty()) {
MS_LOG(ERROR) << "Input index:" << one_stack.first << " is null in actor:" << GetAID();
return;
}
one_stack.second.pop();
}
@ -246,11 +255,13 @@ void StackActor::EraseInput(const OpContext<DeviceTensor> *const context) {
const auto &partial_iter = input_parameter_partial_.find(context->sequential_num_);
if (partial_iter == input_parameter_partial_.end()) {
MS_LOG(ERROR) << "Invalid input for actor:" << GetAID();
return;
}
for (auto &one_stack : partial_iter->second) {
if (one_stack.second.empty()) {
MS_LOG(ERROR) << "Input index:" << one_stack.first << " is null in actor:" << GetAID();
return;
}
one_stack.second.pop();
}

View File

@ -35,7 +35,7 @@ void SwitchActor::Init() {
// Init output data.
for (const auto &data_arrow : output_data_arrows_) {
if (data_arrow->from_output_index_ != 0) {
MS_LOG(ERROR) << "Invalid from index:" << data_arrow->from_output_index_ << " for actor:" << GetAID();
MS_LOG(EXCEPTION) << "Invalid from index:" << data_arrow->from_output_index_ << " for actor:" << GetAID().Name();
}
auto data = std::make_unique<OpData<DeviceTensor>>(data_arrow->to_op_id_, nullptr, data_arrow->to_input_index_);
MS_EXCEPTION_IF_NULL(data);