forked from mindspore-Ecosystem/mindspore
fix graph output address set in the one time memory application scenarios
This commit is contained in:
parent
db92a4e7b8
commit
179c677fef
|
@ -481,7 +481,9 @@ void GPUSession::UpdateOutputTensors(const VectorRef *outputs,
|
|||
|
||||
// When the device address of graph cnode output is set in tensor, the graph output need be set new device
|
||||
// address, to avoid that the device address context of tensor be rewritten in the next step or next loop.
|
||||
if (node->isa<CNode>()) {
|
||||
// But one time memory application scenarios need to be skipped, because the memory is not allocated next step:
|
||||
// 1. Non cnode 2. Communication kernel.
|
||||
if (node->isa<CNode>() && !AnfAlgo::IsCommunicationOp(node)) {
|
||||
auto new_address = std::make_shared<device::gpu::GPUDeviceAddress>(nullptr, address->GetSize());
|
||||
AnfAlgo::SetOutputAddr(new_address, output_index, node.get());
|
||||
if (context::GraphKernelFlags::GetInstance().IsEnableGraphKernel()) {
|
||||
|
|
|
@ -25,7 +25,6 @@
|
|||
#include "ir/anf.h"
|
||||
#include "ir/dtype.h"
|
||||
#include "utils/utils.h"
|
||||
#include "frontend/operator/ops.h"
|
||||
#include "backend/kernel_compiler/kernel.h"
|
||||
#include "backend/session/kernel_graph.h"
|
||||
|
||||
|
|
Loading…
Reference in New Issue