unified runtime bug of host device and graph kernel

This commit is contained in:
limingqi107 2021-07-29 11:02:28 +08:00
parent 8257b469f5
commit 86c69d116b
4 changed files with 13 additions and 8 deletions

View File

@ -1216,7 +1216,6 @@ void KernelGraph::UpdateGraphOutputMap(const std::vector<AnfWithOutIndex> &old_o
if (old_output == new_output) {
continue;
}
// Update the graph output map.
if (graph_output_to_front_node_map_.count(old_output) > 0) {
MS_LOG(INFO) << "Replace backend output node " << old_output.first->fullname_with_scope() << " with index "
@ -1226,11 +1225,6 @@ void KernelGraph::UpdateGraphOutputMap(const std::vector<AnfWithOutIndex> &old_o
graph_output_to_front_node_map_.erase(old_output);
}
// Update the internal output map.
if (IsInternalOutput(old_output.first, old_output.second)) {
ReplaceInternalOutput(old_output.first, new_output.first, old_output.second, new_output.second);
}
if (old_output.first == new_output.first) {
continue;
}

View File

@ -32,7 +32,7 @@ void ComputeThreadNums(size_t *actor_thread_num, size_t *OMP_thread_num) {
auto context_ptr = MsContext::GetInstance();
MS_EXCEPTION_IF_NULL(context_ptr);
// The pyNative mode is the step execution strategy, so only need the kActorThreadMinNum.
if (context_ptr->get_param<bool>(MS_CTX_SAVE_GRAPHS_FLAG) == kPynativeMode) {
if (context_ptr->get_param<int>(MS_CTX_EXECUTION_MODE) == kPynativeMode) {
*actor_thread_num = kActorThreadMinNum;
} else {
*actor_thread_num = cpu_core_num < kActorThreadMinNum ? kActorThreadMinNum : cpu_core_num;
@ -117,6 +117,10 @@ bool IsGatherActor(const AnfNodePtr &front_node,
bool Copy(DeviceTensor *dst_device_tensor, const DeviceTensor *src_device_tensor) {
MS_EXCEPTION_IF_NULL(dst_device_tensor);
MS_EXCEPTION_IF_NULL(src_device_tensor);
if (src_device_tensor->GetSize() != dst_device_tensor->GetSize()) {
MS_LOG(WARNING) << " Copy size is not equal, input size:" << src_device_tensor->GetSize()
<< ", output size:" << dst_device_tensor->GetSize();
}
// Exist the size alignment in some device, so get the min device size.
size_t copy_size = std::min(src_device_tensor->GetSize(), dst_device_tensor->GetSize());

View File

@ -72,6 +72,13 @@ void CopyActor::SendMemoryFreeReq(OpContext<DeviceTensor> *context) {
void CopyActor::OnMemoryAllocFinish(OpContext<DeviceTensor> *context) {
MS_EXCEPTION_IF_NULL(context);
MS_EXCEPTION_IF_NULL(output_device_tensor_[0]);
MS_EXCEPTION_IF_NULL(input_device_tensor_[0]);
if (input_device_tensor_[0]->GetSize() != output_device_tensor_[0]->GetSize()) {
MS_LOG(WARNING) << GetAID().Name() << " copy size is not equal, input size:" << input_device_tensor_[0]->GetSize()
<< ", output size:" << output_device_tensor_[0]->GetSize();
}
if (!Copy(output_device_tensor_[0], input_device_tensor_[0])) {
std::string error_info = "Copy device tensor failed: " + GetAID().Name();

View File

@ -18,7 +18,7 @@
#include "thread/core_affinity.h"
namespace mindspore {
constexpr size_t MAX_READY_ACTOR_NR = 1024;
constexpr size_t MAX_READY_ACTOR_NR = 4096;
void ActorWorker::CreateThread(ActorThreadPool *pool) {
THREAD_RETURN_IF_NULL(pool);
pool_ = pool;