From bf793cd1a441809828dc4de0b974005879285caf Mon Sep 17 00:00:00 2001 From: ZPaC Date: Mon, 21 Dec 2020 19:09:02 +0800 Subject: [PATCH] Return false if Recv node is not found in gpu_stream_assign.cc --- mindspore/ccsrc/runtime/device/gpu/gpu_stream_assign.cc | 3 +++ 1 file changed, 3 insertions(+) diff --git a/mindspore/ccsrc/runtime/device/gpu/gpu_stream_assign.cc b/mindspore/ccsrc/runtime/device/gpu/gpu_stream_assign.cc index de69c73d52a..cafc3af945e 100644 --- a/mindspore/ccsrc/runtime/device/gpu/gpu_stream_assign.cc +++ b/mindspore/ccsrc/runtime/device/gpu/gpu_stream_assign.cc @@ -83,7 +83,10 @@ bool FindAllReduceStreamSwitchPos(const std::shared_ptr &k std::vector::iterator mock_recv_node_iter = FindRecvNodePos(iter, iter_end, *iter, kAllReduceStreamSwitch); if (mock_recv_node_iter == iter_end) { + // Each AllReduce must have its corresponding node which takes AllReduce as a input to synchronize stream, + // otherwise consider FindAllReduceStreamSwitchPos as failed. MS_LOG(INFO) << "Can't find recv node place after AllReduce node."; + return false; } else if (AnfAlgo::GetCNodeName(*mock_recv_node_iter) != kAllReduceOpName) { SendRecvPair pair2 = {kAllReduceStreamSwitch, *iter, *mock_recv_node_iter, IntToSize(iter - iter_begin + 1), IntToSize(mock_recv_node_iter - iter_begin)};