diff --git a/mindspore/lite/src/delegate/npu/npu_graph.cc b/mindspore/lite/src/delegate/npu/npu_graph.cc index 1e0e82e6f28..4bbe439321d 100644 --- a/mindspore/lite/src/delegate/npu/npu_graph.cc +++ b/mindspore/lite/src/delegate/npu/npu_graph.cc @@ -70,38 +70,35 @@ void NPUGraph::set_output(mindspore::MSTensor out_tensor, int index) { int NPUGraph::Init() { all_kernels_.clear(); std::map is_visited; + std::map is_searched; + std::queue candidate_in_ops; + std::queue valid_in_ops; + // Initialization for (auto op : npu_ops_) { is_visited[op] = false; + is_searched[op] = false; + if (op->in_ops().empty()) { + candidate_in_ops.push(op); + } } - - while (npu_ops_.size() > 0) { - auto head_op_iter = std::find_if(npu_ops_.begin(), npu_ops_.end(), [&](const NPUOp *op) { - if (is_visited[op]) { - return false; - } - return true; - }); - if (head_op_iter == npu_ops_.end()) { + while (!candidate_in_ops.empty()) { + // 1. Find out all input ops except transpose, and handle transpose ops independently. + auto ret = FindValidSubgraphInOps(&valid_in_ops, &candidate_in_ops, &is_visited); + if (ret != RET_OK) { + MS_LOG(DEBUG) << "Fail to find valid input ops or handle transpose ops."; + return RET_ERROR; + } + if (valid_in_ops.empty()) { + MS_LOG(INFO) << "Can not find input ops except transpose."; break; } - auto head_op = *head_op_iter; - if (head_op->type() != schema::PrimitiveType_Transpose) { - // If npu_kernel does not equal nullptr, this kernel can be supported by delegate - auto npu_ops = FindSubgraphOps(head_op, &is_visited); - auto subgraph_kernel = CreateNPUSubgraphKernel(npu_ops); - if (subgraph_kernel == nullptr) { - MS_LOG(DEBUG) << "Create NPU subgraph kernel failed."; - return RET_ERROR; - } - all_kernels_.push_back(subgraph_kernel); - } else { - auto transpose_kernel = CreateNPUTransposeKernel(head_op); - if (transpose_kernel == nullptr) { - MS_LOG(DEBUG) << "New NPU transpose kernel failed."; - return RET_ERROR; - } - all_kernels_.push_back(transpose_kernel); - is_visited[head_op] = true; + // 2. Find out all ready ops based on valid input ops, but these ops maybe not belong to the same subgraph. + auto ready_ops = FindReadySubgraphOps(valid_in_ops, &candidate_in_ops, &is_visited); + // 3. Create subgraph(s). Input ops with connection will be built into a same subgraph. + ret = CreateSubgraphFromReadyOps(&valid_in_ops, ready_ops, &is_searched); + if (ret != RET_OK) { + MS_LOG(DEBUG) << "Fail to create subgraph(s) from ready ops."; + return RET_ERROR; } } return RET_OK; @@ -141,32 +138,128 @@ int NPUGraph::FindPreNextOps() { return RET_OK; } -std::vector NPUGraph::FindSubgraphOps(NPUOp *head_op, std::map *is_visited) { +int NPUGraph::FindValidSubgraphInOps(std::queue *valid_in_ops, std::queue *candidate_in_ops, + std::map *is_visited) { + while (!candidate_in_ops->empty()) { + auto cur_op = candidate_in_ops->front(); + candidate_in_ops->pop(); + if ((*is_visited)[cur_op]) { + continue; + } + if (cur_op->type() == schema::PrimitiveType_Transpose) { + auto transpose_kernel = CreateNPUTransposeKernel(cur_op); + if (transpose_kernel == nullptr) { + MS_LOG(DEBUG) << "New NPU transpose kernel failed."; + return RET_ERROR; + } + all_kernels_.push_back(transpose_kernel); + (*is_visited)[cur_op] = true; + for (auto out_op : cur_op->out_ops()) { + if (out_op->type() == schema::PrimitiveType_Transpose) { + candidate_in_ops->push(out_op); + } else { + auto input_ready = std::all_of(out_op->in_ops().begin(), out_op->in_ops().end(), + [&](NPUOp *in_op) { return (*is_visited)[in_op] == true; }); + if (input_ready) { + valid_in_ops->push(out_op); + } + } + } + } else { + valid_in_ops->push(cur_op); + } + } + return RET_OK; +} + +std::vector NPUGraph::FindReadySubgraphOps(std::queue op_queue, + std::queue *next_candidate_ops, + std::map *is_visited) { std::vector subgraph_ops; - subgraph_ops.push_back(head_op); - (*is_visited)[head_op] = true; - std::queue op_queue; - op_queue.emplace(head_op); while (!op_queue.empty()) { auto cur_op = op_queue.front(); op_queue.pop(); + if ((*is_visited)[cur_op]) { + continue; + } + subgraph_ops.push_back(cur_op); + (*is_visited)[cur_op] = true; auto out_ops = cur_op->out_ops(); for (auto out_op : out_ops) { - if ((*is_visited)[out_op] == true) { + if ((*is_visited)[out_op]) { continue; } auto input_ready = std::all_of(out_op->in_ops().begin(), out_op->in_ops().end(), [&](NPUOp *in_op) { return (*is_visited)[in_op] == true; }); if (input_ready && out_op->type() != schema::PrimitiveType_Transpose) { - subgraph_ops.push_back(out_op); - (*is_visited)[out_op] = true; op_queue.push(out_op); + } else { + next_candidate_ops->push(out_op); } } } return subgraph_ops; } +void FindConnectedOps(NPUOp *head_op, std::vector ready_ops, std::vector *connected_ops, + std::map *is_searched) { + std::queue bfs_ops; + bfs_ops.push(head_op); + while (!bfs_ops.empty()) { + auto cur_op = bfs_ops.front(); + bfs_ops.pop(); + if ((*is_searched)[cur_op]) { + continue; + } + for (auto in_op : cur_op->in_ops()) { + if (std::find(ready_ops.begin(), ready_ops.end(), in_op) == ready_ops.end() || (*is_searched)[in_op]) { + continue; + } + bfs_ops.push(in_op); + } + for (auto out_op : cur_op->out_ops()) { + if (std::find(ready_ops.begin(), ready_ops.end(), out_op) == ready_ops.end() || (*is_searched)[out_op]) { + continue; + } + bfs_ops.push(out_op); + } + (*is_searched)[cur_op] = true; + connected_ops->push_back(cur_op); + } + return; +} + +int NPUGraph::CreateSubgraphFromReadyOps(std::queue *valid_in_ops, std::vector ready_ops, + std::map *is_searched) { + while (!valid_in_ops->empty()) { + std::vector connected_ops; + auto op = valid_in_ops->front(); + valid_in_ops->pop(); + if ((*is_searched)[op]) { + continue; + } + if (valid_in_ops->empty()) { + // use BFS to find out connected input ops + FindConnectedOps(op, ready_ops, &connected_ops, is_searched); + } else { + // if current input op is the only input op, there is no need to confirm the connectivity + for (auto ready_op : ready_ops) { + if (!(*is_searched)[ready_op]) { + connected_ops.push_back(ready_op); + (*is_searched)[ready_op] = true; + } + } + } + auto subgraph_kernel = CreateNPUSubgraphKernel(connected_ops); + if (subgraph_kernel == nullptr) { + MS_LOG(DEBUG) << "Create NPU subgraph kernel failed."; + return RET_ERROR; + } + all_kernels_.push_back(subgraph_kernel); + } + return RET_OK; +} + kernel::Kernel *NPUGraph::CreateNPUSubgraphKernel(std::vector npu_ops) { auto subgraph = new (std::nothrow) NPUSubGraph(npu_ops, npu_manager_); if (subgraph == nullptr) { diff --git a/mindspore/lite/src/delegate/npu/npu_graph.h b/mindspore/lite/src/delegate/npu/npu_graph.h index 9ffd1d6e1f9..4cba464d5fc 100644 --- a/mindspore/lite/src/delegate/npu/npu_graph.h +++ b/mindspore/lite/src/delegate/npu/npu_graph.h @@ -18,6 +18,7 @@ #define MINDSPORE_LITE_SRC_RUNTIME_DELEGATE_NPU_NPU_GRAPH_H_ #include +#include #include #include #include "include/api/kernel.h" @@ -59,7 +60,14 @@ class NPUGraph : public kernel::Kernel { std::vector FindNextOps(NPUOp *cur_op); - std::vector FindSubgraphOps(NPUOp *head_op, std::map *is_visited); + int FindValidSubgraphInOps(std::queue *valid_in_ops, std::queue *candidate_in_ops, + std::map *is_visited); + + std::vector FindReadySubgraphOps(std::queue op_queue, std::queue *next_candidate_ops, + std::map *is_visited); + + int CreateSubgraphFromReadyOps(std::queue *valid_in_ops, std::vector ready_ops, + std::map *is_searched); kernel::Kernel *CreateNPUSubgraphKernel(std::vector ops); diff --git a/mindspore/lite/src/delegate/npu/npu_subgraph.cc b/mindspore/lite/src/delegate/npu/npu_subgraph.cc index 58f64bf3e05..b6b133101bb 100644 --- a/mindspore/lite/src/delegate/npu/npu_subgraph.cc +++ b/mindspore/lite/src/delegate/npu/npu_subgraph.cc @@ -160,7 +160,7 @@ int NPUSubGraph::BuildNPUInputOp() { for (int i = 0; i < op->inputs().size(); ++i) { auto in_tensor = op->inputs()[i]; if (IsSubGraphInputTensor(in_tensor)) { - auto tensor_name = op->name() + "_" + std::to_string(count++); + auto tensor_name = "Input_" + std::to_string(count++) + '_' + op->name(); hiai::op::Data *data; data = ConverterToNPUData(in_tensor, tensor_name); subgraph_input_ops_.push_back(*data); diff --git a/mindspore/lite/test/config/models_caffe.cfg b/mindspore/lite/test/config/models_caffe.cfg index 6c173248e7d..a22cb5a1f8e 100644 --- a/mindspore/lite/test/config/models_caffe.cfg +++ b/mindspore/lite/test/config/models_caffe.cfg @@ -125,3 +125,4 @@ ml_Heatmap_depth_240180;2 ml_Heatmap_depth_180240;2 ml_video_edit_person_divison_video;2 ml_video_edit_hair_dyeing_segmodel_v2 +ml_video_edit_hairline_segmentation;3 diff --git a/mindspore/lite/test/config/models_caffe_fp16.cfg b/mindspore/lite/test/config/models_caffe_fp16.cfg index 3c7bd66649c..208fd880576 100644 --- a/mindspore/lite/test/config/models_caffe_fp16.cfg +++ b/mindspore/lite/test/config/models_caffe_fp16.cfg @@ -134,3 +134,4 @@ hdc_ocr_recog_horizontal 0.5 ml_Heatmap_depth_240180;2 10 ml_Heatmap_depth_180240;2 7 ml_video_edit_hair_dyeing_segmodel_v2 1 +ml_video_edit_hairline_segmentation;3 1.5 \ No newline at end of file diff --git a/mindspore/lite/test/config/models_npu.cfg b/mindspore/lite/test/config/models_npu.cfg index a42b907fc7e..579111d2ba2 100644 --- a/mindspore/lite/test/config/models_npu.cfg +++ b/mindspore/lite/test/config/models_npu.cfg @@ -86,3 +86,4 @@ ml_video_edit_art_generate_20210513.onnx 0.5 ml_video_edit_art_transfer_20210513.onnx;3 0.5 ml_video_edit_hair_dyeing_segmodel_v2 0.5 ml_video_edit_makeup_mobilenetv203.onnx 2 +ml_video_edit_hairline_segmentation;3 0.5