diff --git a/mindspore/ccsrc/backend/session/gpu_session.cc b/mindspore/ccsrc/backend/session/gpu_session.cc index d82b652a422..0b19137fb64 100644 --- a/mindspore/ccsrc/backend/session/gpu_session.cc +++ b/mindspore/ccsrc/backend/session/gpu_session.cc @@ -481,7 +481,9 @@ void GPUSession::UpdateOutputTensors(const VectorRef *outputs, // When the device address of graph cnode output is set in tensor, the graph output need be set new device // address, to avoid that the device address context of tensor be rewritten in the next step or next loop. - if (node->isa()) { + // But one time memory application scenarios need to be skipped, because the memory is not allocated next step: + // 1. Non cnode 2. Communication kernel. + if (node->isa() && !AnfAlgo::IsCommunicationOp(node)) { auto new_address = std::make_shared(nullptr, address->GetSize()); AnfAlgo::SetOutputAddr(new_address, output_index, node.get()); if (context::GraphKernelFlags::GetInstance().IsEnableGraphKernel()) { diff --git a/mindspore/ccsrc/runtime/device/gpu/kernel_info_setter.h b/mindspore/ccsrc/runtime/device/gpu/kernel_info_setter.h index 9c248d6f329..f6cbde47da4 100644 --- a/mindspore/ccsrc/runtime/device/gpu/kernel_info_setter.h +++ b/mindspore/ccsrc/runtime/device/gpu/kernel_info_setter.h @@ -25,7 +25,6 @@ #include "ir/anf.h" #include "ir/dtype.h" #include "utils/utils.h" -#include "frontend/operator/ops.h" #include "backend/kernel_compiler/kernel.h" #include "backend/session/kernel_graph.h"