!20558 [MS][LITE] optimize the npu subgraph split procedure

Merge pull request !20558 from XianglongZeng/myms_new_2
This commit is contained in:
i-robot 2021-07-23 03:35:54 +00:00 committed by Gitee
commit 4d60b57095
6 changed files with 141 additions and 37 deletions

View File

@ -70,38 +70,35 @@ void NPUGraph::set_output(mindspore::MSTensor out_tensor, int index) {
int NPUGraph::Init() { int NPUGraph::Init() {
all_kernels_.clear(); all_kernels_.clear();
std::map<const NPUOp *, bool> is_visited; std::map<const NPUOp *, bool> is_visited;
std::map<const NPUOp *, bool> is_searched;
std::queue<NPUOp *> candidate_in_ops;
std::queue<NPUOp *> valid_in_ops;
// Initialization
for (auto op : npu_ops_) { for (auto op : npu_ops_) {
is_visited[op] = false; is_visited[op] = false;
is_searched[op] = false;
if (op->in_ops().empty()) {
candidate_in_ops.push(op);
}
} }
while (!candidate_in_ops.empty()) {
while (npu_ops_.size() > 0) { // 1. Find out all input ops except transpose, and handle transpose ops independently.
auto head_op_iter = std::find_if(npu_ops_.begin(), npu_ops_.end(), [&](const NPUOp *op) { auto ret = FindValidSubgraphInOps(&valid_in_ops, &candidate_in_ops, &is_visited);
if (is_visited[op]) { if (ret != RET_OK) {
return false; MS_LOG(DEBUG) << "Fail to find valid input ops or handle transpose ops.";
} return RET_ERROR;
return true; }
}); if (valid_in_ops.empty()) {
if (head_op_iter == npu_ops_.end()) { MS_LOG(INFO) << "Can not find input ops except transpose.";
break; break;
} }
auto head_op = *head_op_iter; // 2. Find out all ready ops based on valid input ops, but these ops maybe not belong to the same subgraph.
if (head_op->type() != schema::PrimitiveType_Transpose) { auto ready_ops = FindReadySubgraphOps(valid_in_ops, &candidate_in_ops, &is_visited);
// If npu_kernel does not equal nullptr, this kernel can be supported by delegate // 3. Create subgraph(s). Input ops with connection will be built into a same subgraph.
auto npu_ops = FindSubgraphOps(head_op, &is_visited); ret = CreateSubgraphFromReadyOps(&valid_in_ops, ready_ops, &is_searched);
auto subgraph_kernel = CreateNPUSubgraphKernel(npu_ops); if (ret != RET_OK) {
if (subgraph_kernel == nullptr) { MS_LOG(DEBUG) << "Fail to create subgraph(s) from ready ops.";
MS_LOG(DEBUG) << "Create NPU subgraph kernel failed."; return RET_ERROR;
return RET_ERROR;
}
all_kernels_.push_back(subgraph_kernel);
} else {
auto transpose_kernel = CreateNPUTransposeKernel(head_op);
if (transpose_kernel == nullptr) {
MS_LOG(DEBUG) << "New NPU transpose kernel failed.";
return RET_ERROR;
}
all_kernels_.push_back(transpose_kernel);
is_visited[head_op] = true;
} }
} }
return RET_OK; return RET_OK;
@ -141,32 +138,128 @@ int NPUGraph::FindPreNextOps() {
return RET_OK; return RET_OK;
} }
std::vector<NPUOp *> NPUGraph::FindSubgraphOps(NPUOp *head_op, std::map<const NPUOp *, bool> *is_visited) { int NPUGraph::FindValidSubgraphInOps(std::queue<NPUOp *> *valid_in_ops, std::queue<NPUOp *> *candidate_in_ops,
std::map<const NPUOp *, bool> *is_visited) {
while (!candidate_in_ops->empty()) {
auto cur_op = candidate_in_ops->front();
candidate_in_ops->pop();
if ((*is_visited)[cur_op]) {
continue;
}
if (cur_op->type() == schema::PrimitiveType_Transpose) {
auto transpose_kernel = CreateNPUTransposeKernel(cur_op);
if (transpose_kernel == nullptr) {
MS_LOG(DEBUG) << "New NPU transpose kernel failed.";
return RET_ERROR;
}
all_kernels_.push_back(transpose_kernel);
(*is_visited)[cur_op] = true;
for (auto out_op : cur_op->out_ops()) {
if (out_op->type() == schema::PrimitiveType_Transpose) {
candidate_in_ops->push(out_op);
} else {
auto input_ready = std::all_of(out_op->in_ops().begin(), out_op->in_ops().end(),
[&](NPUOp *in_op) { return (*is_visited)[in_op] == true; });
if (input_ready) {
valid_in_ops->push(out_op);
}
}
}
} else {
valid_in_ops->push(cur_op);
}
}
return RET_OK;
}
std::vector<NPUOp *> NPUGraph::FindReadySubgraphOps(std::queue<NPUOp *> op_queue,
std::queue<NPUOp *> *next_candidate_ops,
std::map<const NPUOp *, bool> *is_visited) {
std::vector<NPUOp *> subgraph_ops; std::vector<NPUOp *> subgraph_ops;
subgraph_ops.push_back(head_op);
(*is_visited)[head_op] = true;
std::queue<NPUOp *> op_queue;
op_queue.emplace(head_op);
while (!op_queue.empty()) { while (!op_queue.empty()) {
auto cur_op = op_queue.front(); auto cur_op = op_queue.front();
op_queue.pop(); op_queue.pop();
if ((*is_visited)[cur_op]) {
continue;
}
subgraph_ops.push_back(cur_op);
(*is_visited)[cur_op] = true;
auto out_ops = cur_op->out_ops(); auto out_ops = cur_op->out_ops();
for (auto out_op : out_ops) { for (auto out_op : out_ops) {
if ((*is_visited)[out_op] == true) { if ((*is_visited)[out_op]) {
continue; continue;
} }
auto input_ready = std::all_of(out_op->in_ops().begin(), out_op->in_ops().end(), auto input_ready = std::all_of(out_op->in_ops().begin(), out_op->in_ops().end(),
[&](NPUOp *in_op) { return (*is_visited)[in_op] == true; }); [&](NPUOp *in_op) { return (*is_visited)[in_op] == true; });
if (input_ready && out_op->type() != schema::PrimitiveType_Transpose) { if (input_ready && out_op->type() != schema::PrimitiveType_Transpose) {
subgraph_ops.push_back(out_op);
(*is_visited)[out_op] = true;
op_queue.push(out_op); op_queue.push(out_op);
} else {
next_candidate_ops->push(out_op);
} }
} }
} }
return subgraph_ops; return subgraph_ops;
} }
void FindConnectedOps(NPUOp *head_op, std::vector<NPUOp *> ready_ops, std::vector<NPUOp *> *connected_ops,
std::map<const NPUOp *, bool> *is_searched) {
std::queue<NPUOp *> bfs_ops;
bfs_ops.push(head_op);
while (!bfs_ops.empty()) {
auto cur_op = bfs_ops.front();
bfs_ops.pop();
if ((*is_searched)[cur_op]) {
continue;
}
for (auto in_op : cur_op->in_ops()) {
if (std::find(ready_ops.begin(), ready_ops.end(), in_op) == ready_ops.end() || (*is_searched)[in_op]) {
continue;
}
bfs_ops.push(in_op);
}
for (auto out_op : cur_op->out_ops()) {
if (std::find(ready_ops.begin(), ready_ops.end(), out_op) == ready_ops.end() || (*is_searched)[out_op]) {
continue;
}
bfs_ops.push(out_op);
}
(*is_searched)[cur_op] = true;
connected_ops->push_back(cur_op);
}
return;
}
int NPUGraph::CreateSubgraphFromReadyOps(std::queue<NPUOp *> *valid_in_ops, std::vector<NPUOp *> ready_ops,
std::map<const NPUOp *, bool> *is_searched) {
while (!valid_in_ops->empty()) {
std::vector<NPUOp *> connected_ops;
auto op = valid_in_ops->front();
valid_in_ops->pop();
if ((*is_searched)[op]) {
continue;
}
if (valid_in_ops->empty()) {
// use BFS to find out connected input ops
FindConnectedOps(op, ready_ops, &connected_ops, is_searched);
} else {
// if current input op is the only input op, there is no need to confirm the connectivity
for (auto ready_op : ready_ops) {
if (!(*is_searched)[ready_op]) {
connected_ops.push_back(ready_op);
(*is_searched)[ready_op] = true;
}
}
}
auto subgraph_kernel = CreateNPUSubgraphKernel(connected_ops);
if (subgraph_kernel == nullptr) {
MS_LOG(DEBUG) << "Create NPU subgraph kernel failed.";
return RET_ERROR;
}
all_kernels_.push_back(subgraph_kernel);
}
return RET_OK;
}
kernel::Kernel *NPUGraph::CreateNPUSubgraphKernel(std::vector<NPUOp *> npu_ops) { kernel::Kernel *NPUGraph::CreateNPUSubgraphKernel(std::vector<NPUOp *> npu_ops) {
auto subgraph = new (std::nothrow) NPUSubGraph(npu_ops, npu_manager_); auto subgraph = new (std::nothrow) NPUSubGraph(npu_ops, npu_manager_);
if (subgraph == nullptr) { if (subgraph == nullptr) {

View File

@ -18,6 +18,7 @@
#define MINDSPORE_LITE_SRC_RUNTIME_DELEGATE_NPU_NPU_GRAPH_H_ #define MINDSPORE_LITE_SRC_RUNTIME_DELEGATE_NPU_NPU_GRAPH_H_
#include <vector> #include <vector>
#include <queue>
#include <map> #include <map>
#include <utility> #include <utility>
#include "include/api/kernel.h" #include "include/api/kernel.h"
@ -59,7 +60,14 @@ class NPUGraph : public kernel::Kernel {
std::vector<NPUOp *> FindNextOps(NPUOp *cur_op); std::vector<NPUOp *> FindNextOps(NPUOp *cur_op);
std::vector<NPUOp *> FindSubgraphOps(NPUOp *head_op, std::map<const NPUOp *, bool> *is_visited); int FindValidSubgraphInOps(std::queue<NPUOp *> *valid_in_ops, std::queue<NPUOp *> *candidate_in_ops,
std::map<const NPUOp *, bool> *is_visited);
std::vector<NPUOp *> FindReadySubgraphOps(std::queue<NPUOp *> op_queue, std::queue<NPUOp *> *next_candidate_ops,
std::map<const NPUOp *, bool> *is_visited);
int CreateSubgraphFromReadyOps(std::queue<NPUOp *> *valid_in_ops, std::vector<NPUOp *> ready_ops,
std::map<const NPUOp *, bool> *is_searched);
kernel::Kernel *CreateNPUSubgraphKernel(std::vector<NPUOp *> ops); kernel::Kernel *CreateNPUSubgraphKernel(std::vector<NPUOp *> ops);

View File

@ -160,7 +160,7 @@ int NPUSubGraph::BuildNPUInputOp() {
for (int i = 0; i < op->inputs().size(); ++i) { for (int i = 0; i < op->inputs().size(); ++i) {
auto in_tensor = op->inputs()[i]; auto in_tensor = op->inputs()[i];
if (IsSubGraphInputTensor(in_tensor)) { if (IsSubGraphInputTensor(in_tensor)) {
auto tensor_name = op->name() + "_" + std::to_string(count++); auto tensor_name = "Input_" + std::to_string(count++) + '_' + op->name();
hiai::op::Data *data; hiai::op::Data *data;
data = ConverterToNPUData(in_tensor, tensor_name); data = ConverterToNPUData(in_tensor, tensor_name);
subgraph_input_ops_.push_back(*data); subgraph_input_ops_.push_back(*data);

View File

@ -125,3 +125,4 @@ ml_Heatmap_depth_240180;2
ml_Heatmap_depth_180240;2 ml_Heatmap_depth_180240;2
ml_video_edit_person_divison_video;2 ml_video_edit_person_divison_video;2
ml_video_edit_hair_dyeing_segmodel_v2 ml_video_edit_hair_dyeing_segmodel_v2
ml_video_edit_hairline_segmentation;3

View File

@ -134,3 +134,4 @@ hdc_ocr_recog_horizontal 0.5
ml_Heatmap_depth_240180;2 10 ml_Heatmap_depth_240180;2 10
ml_Heatmap_depth_180240;2 7 ml_Heatmap_depth_180240;2 7
ml_video_edit_hair_dyeing_segmodel_v2 1 ml_video_edit_hair_dyeing_segmodel_v2 1
ml_video_edit_hairline_segmentation;3 1.5

View File

@ -86,3 +86,4 @@ ml_video_edit_art_generate_20210513.onnx 0.5
ml_video_edit_art_transfer_20210513.onnx;3 0.5 ml_video_edit_art_transfer_20210513.onnx;3 0.5
ml_video_edit_hair_dyeing_segmodel_v2 0.5 ml_video_edit_hair_dyeing_segmodel_v2 0.5
ml_video_edit_makeup_mobilenetv203.onnx 2 ml_video_edit_makeup_mobilenetv203.onnx 2
ml_video_edit_hairline_segmentation;3 0.5