fix graph output address set in the one time memory application scenarios

This commit is contained in:
limingqi107 2021-04-26 15:03:51 +08:00
parent db92a4e7b8
commit 179c677fef
2 changed files with 3 additions and 2 deletions

View File

@ -481,7 +481,9 @@ void GPUSession::UpdateOutputTensors(const VectorRef *outputs,
// When the device address of graph cnode output is set in tensor, the graph output need be set new device
// address, to avoid that the device address context of tensor be rewritten in the next step or next loop.
if (node->isa<CNode>()) {
// But one time memory application scenarios need to be skipped, because the memory is not allocated next step:
// 1. Non cnode 2. Communication kernel.
if (node->isa<CNode>() && !AnfAlgo::IsCommunicationOp(node)) {
auto new_address = std::make_shared<device::gpu::GPUDeviceAddress>(nullptr, address->GetSize());
AnfAlgo::SetOutputAddr(new_address, output_index, node.get());
if (context::GraphKernelFlags::GetInstance().IsEnableGraphKernel()) {

View File

@ -25,7 +25,6 @@
#include "ir/anf.h"
#include "ir/dtype.h"
#include "utils/utils.h"
#include "frontend/operator/ops.h"
#include "backend/kernel_compiler/kernel.h"
#include "backend/session/kernel_graph.h"