!32893 [MSLITE][Bug][Func] Adjust runtime shape fusion pass.

Merge pull request !32893 from wangshaocong/bugfix
This commit is contained in:
i-robot 2022-04-21 03:21:24 +00:00 committed by Gitee
commit bd318b94af
No known key found for this signature in database
GPG Key ID: 173E9B9CA92EEF8F
11 changed files with 56 additions and 64 deletions

View File

@ -114,6 +114,8 @@ set(LITE_SRC
${CMAKE_CURRENT_SOURCE_DIR}/runtime/inner_allocator.cc
${CMAKE_CURRENT_SOURCE_DIR}/runtime/runtime_allocator.cc
${CMAKE_CURRENT_SOURCE_DIR}/runtime/infer_manager.cc
${CMAKE_CURRENT_SOURCE_DIR}/runtime/runtime_shape_fusion_pass.cc
${CMAKE_CURRENT_SOURCE_DIR}/runtime/runtime_pass.cc
${CMAKE_CURRENT_SOURCE_DIR}/schema_tensor_wrapper.cc
${CMAKE_CURRENT_SOURCE_DIR}/tensor.cc
${CMAKE_CURRENT_SOURCE_DIR}/tensor_category.cc
@ -205,13 +207,6 @@ if(MSLITE_ENABLE_STRING_KERNEL)
${CMAKE_CURRENT_SOURCE_DIR}/common/string_util.cc
)
endif()
if(MSLITE_ENABLE_RUNTIME_PASS)
set(LITE_SRC
${LITE_SRC}
${CMAKE_CURRENT_SOURCE_DIR}/runtime/runtime_shape_fusion_pass.cc
${CMAKE_CURRENT_SOURCE_DIR}/runtime/runtime_pass.cc
)
endif()
if(MSLITE_ENABLE_CONTROLFLOW)
set(LITE_SRC

View File

@ -17,9 +17,7 @@
#include "src/lite_session.h"
#include <set>
#include "src/pack_weight_manager.h"
#ifndef RUNTIME_PASS_CLIP
#include "src/runtime/runtime_pass.h"
#endif
#if defined(LINUX_RUNTIME)
#include <malloc.h>
#endif
@ -1262,13 +1260,12 @@ int LiteSession::Resize(const std::vector<mindspore::tensor::MSTensor *> &inputs
is_running_.store(false);
return RET_ERROR;
}
#ifndef RUNTIME_PASS_CLIP
auto status = GraphOptimizePass(&kernels_);
if (status != RET_OK) {
MS_LOG(ERROR) << "GraphOptimizePass failed.";
return RET_ERROR;
}
#endif
is_running_.store(false);
#if defined(LINUX_RUNTIME)

View File

@ -15,6 +15,7 @@
*/
#include "src/runtime/kernel/cpu/fp32/shape_fusion_fp32.h"
#include <algorithm>
#include "src/kernel_registry.h"
#include "include/errorcode.h"
#include "src/common/log_adapter.h"
@ -34,11 +35,12 @@ int ShapeFusionCPUKernel::Prepare() {
int ShapeFusionCPUKernel::ReSize() { return KernelInferShape(in_tensors_, out_tensors_, op_parameter_); }
int ShapeFusionCPUKernel::Run() {
#ifndef DELEGATE_CLIP
bool is_const =
std::all_of(out_tensors_.begin(), out_tensors_.end(), [](lite::Tensor *tensor) { return tensor->IsConst(); });
if (is_const) {
return RET_OK;
}
return KernelInferShape(in_tensors_, out_tensors_, op_parameter_);
#else
return RET_OK;
#endif
}
REG_KERNEL(kCPU, kNumberTypeInt32, PrimType_Inner_ShapeFusion, LiteKernelCreator<ShapeFusionCPUKernel>)

View File

@ -17,11 +17,12 @@
#include "src/runtime/runtime_pass.h"
#include "nnacl/conv_parameter.h"
namespace mindspore::lite {
#ifndef RUNTIME_PASS_CLIP
namespace {
const constexpr int kMaxDepth = 2048;
}
namespace mindspore::lite {
void Nc4hw4PassReplace(std::vector<kernel::KernelExec *> *kernels, std::vector<Tensor *> *tensors, size_t index) {
kernel::KernelExec *conv_kernel = kernels->at(index);
kernel::KernelExec *transpose_kernel = conv_kernel->out_kernels().front();
@ -287,8 +288,10 @@ STATUS DeleteRedundantTrans(std::vector<kernel::KernelExec *> *kernels) {
}
return RET_OK;
}
#endif
STATUS RuntimePass(std::vector<kernel::KernelExec *> *subgraphs, std::vector<Tensor *> *tensors) {
#ifndef RUNTIME_PASS_CLIP
for (auto subgraph : *subgraphs) {
auto sub = reinterpret_cast<kernel::SubGraphKernel *>(subgraph);
if (RuntimePassValid(sub) == false) {
@ -305,10 +308,12 @@ STATUS RuntimePass(std::vector<kernel::KernelExec *> *subgraphs, std::vector<Ten
return RET_ERROR;
}
}
#endif
return RET_OK;
}
STATUS GraphOptimizePass(std::vector<kernel::KernelExec *> *sub_graphs) {
#ifndef RUNTIME_PASS_CLIP
for (auto subgraph : *sub_graphs) {
auto sub_graph = reinterpret_cast<kernel::SubGraphKernel *>(subgraph);
if (RuntimePassValid(sub_graph) == false) {
@ -321,6 +326,7 @@ STATUS GraphOptimizePass(std::vector<kernel::KernelExec *> *sub_graphs) {
return RET_ERROR;
}
}
#endif
return RET_OK;
}
} // namespace mindspore::lite

View File

@ -17,7 +17,6 @@
#ifndef MINDSPORE_LITE_SRC_RUNTIME_RUNTIME_PASS_H_
#define MINDSPORE_LITE_SRC_RUNTIME_RUNTIME_PASS_H_
#ifndef RUNTIME_PASS_CLIP
#include <vector>
#include "src/kernel_exec.h"
#include "src/sub_graph_kernel.h"
@ -25,9 +24,9 @@
#include "schema/model_generated.h"
namespace mindspore::lite {
STATUS RuntimePass(std::vector<kernel::KernelExec *> *subgraphs, std::vector<Tensor *> *tensors);
STATUS GraphOptimizePass(std::vector<kernel::KernelExec *> *sub_graphs);
#ifndef RUNTIME_PASS_CLIP
/* Nc4hw4 PASS
* before : --(nhwc)-- CONV --(nhwc)-- TRANSPOSE --(nchw)-- IN --(nchw)-- TRANSPOSE --(nhwc)--
* after : --(nhwc)-- CONV --(nc4hw4)-- IN --(nhwc)--
@ -47,7 +46,6 @@ static const std::vector<schema::PrimitiveType> Nc4hw4FormatInOpList = {schema::
static const schema::PrimitiveType ConvNormC4OpConv2DFusion = schema::PrimitiveType_Conv2DFusion;
static const schema::PrimitiveType ConvNormC4OpActivation = schema::PrimitiveType_Activation;
static const schema::PrimitiveType ConvNormC4OpInstanceNorm = schema::PrimitiveType_InstanceNorm;
} // namespace mindspore::lite
#endif
} // namespace mindspore::lite
#endif // MINDSPORE_LITE_SRC_RUNTIME_RUNTIME_PASS_H_

View File

@ -14,6 +14,7 @@
* limitations under the License.
*/
#ifndef RUNTIME_PASS_CLIP
#include "src/runtime/runtime_shape_fusion_pass.h"
#include <set>
#include <queue>
@ -21,13 +22,11 @@
#include "include/errorcode.h"
#include "src/common/log_adapter.h"
#include "nnacl/op_base.h"
#include "schema/inner/model_generated.h"
namespace mindspore::lite {
namespace {
constexpr size_t kInitialSize = 1024;
} // namespace
namespace mindspore::lite {
int ShapeFusionPass::ConvertToShapeFusion(Model::Node *node) {
MS_ASSERT(node != nullptr);
auto input_tensor = src_tensors_->at(node->input_indices_.front());
@ -382,3 +381,4 @@ int ShapeFusionPass::GetFusionMatrixFromConstantTensor(const lite::Tensor *tenso
return RET_OK;
}
} // namespace mindspore::lite
#endif

View File

@ -17,15 +17,16 @@
#ifndef MINDSPORE_LITE_SRC_RUNTIME_RUNTIME_SHAPE_FUSION_PASS_H_
#define MINDSPORE_LITE_SRC_RUNTIME_RUNTIME_SHAPE_FUSION_PASS_H_
#ifndef RUNTIME_PASS_CLIP
#include <map>
#include <vector>
#include <algorithm>
#include "src/lite_model.h"
#include "src/common/tensor_util.h"
#include "schema/ops_generated.h"
#include "schema/model_generated.h"
namespace mindspore::lite {
#ifndef RUNTIME_PASS_CLIP
struct ShapeFusionMatrix {
ShapeFusionMatrix() {}
explicit ShapeFusionMatrix(size_t dim) {
@ -78,6 +79,7 @@ struct ShapeFusionMatrix {
std::vector<std::vector<float>> shape_matrix;
bool scalar = false;
};
#endif
class ShapeFusionPass {
public:
@ -92,10 +94,31 @@ class ShapeFusionPass {
}
~ShapeFusionPass() = default;
int ConvertToShapeFusion(Model::Node *node);
int FusePostNodes(Model::Node *node, size_t subgraph_index);
void Run(Model::Node *node, size_t subgraph_index) {
#ifndef RUNTIME_PASS_CLIP
if (ConvertToShapeFusion(node) != RET_OK) {
MS_LOG(WARNING) << "Convert to built-in shape failed: " << node->name_;
} else if (FusePostNodes(node, subgraph_index) != RET_OK) {
MS_LOG(WARNING) << "Fused to built-in shape failed: " << node->name_;
}
std::transform(node->output_indices_.begin(), node->output_indices_.end(),
std::back_inserter(shape_fusion_outputs_),
[&](uint32_t idx) { return this->src_tensors_->at(idx); });
#endif
}
void FreeOutputTensorDataOfFusedShape() {
#if !defined(RUNTIME_PASS_CLIP)
for (auto tensor : shape_fusion_outputs_) {
tensor->FreeData();
tensor->set_category(VAR);
}
#endif
}
private:
#ifndef RUNTIME_PASS_CLIP
int ConvertToShapeFusion(Model::Node *node);
int FusePostNodes(Model::Node *node, size_t subgraph_index);
Tensor *BuildTensorFromShapeFusionMatrix(const ShapeFusionMatrix &shape_fusion_matrix);
bool CheckCanFused(const Model::Node *shape_fusion, const Model::Node *post_node, uint32_t input_idx,
size_t subgraph_index);
@ -108,12 +131,13 @@ class ShapeFusionPass {
ShapeFusionMatrix *constant_matrix);
private:
std::map<uint32_t, ShapeFusionMatrix> shape_fusion_matrices_;
std::vector<lite::Tensor *> shape_fusion_outputs_;
#endif
LiteModel *lite_model_ = nullptr;
const std::vector<Model::Node *> *all_nodes_ = nullptr;
std::vector<lite::Tensor *> *src_tensors_ = nullptr;
std::map<uint32_t, std::vector<Model::Node *>> used_nodes_;
std::map<uint32_t, ShapeFusionMatrix> shape_fusion_matrices_;
};
} // namespace mindspore::lite
#endif
#endif // MINDSPORE_LITE_SRC_RUNTIME_RUNTIME_SHAPE_FUSION_PASS_H_

View File

@ -39,9 +39,7 @@
#include "src/common/tensor_util.h"
#include "src/common/context_util.h"
#include "src/runtime/infer_manager.h"
#ifndef RUNTIME_PASS_CLIP
#include "src/runtime/runtime_pass.h"
#endif
#ifndef AUTO_PARALLEL_CLIP
#include "src/sub_graph_split.h"
#endif
@ -259,7 +257,6 @@ int Scheduler::SchedulePreProcess() {
schema_version_ = reinterpret_cast<LiteModel *>(src_model_)->GetSchemaVersion();
this->graph_output_node_indexes_ = GetGraphOutputNodes(src_model_);
*is_infershape_ = InferSubGraphShape(kMainSubGraphIndex);
if (*is_infershape_ != RET_OK && *is_infershape_ != RET_INFER_INVALID) {
MS_LOG(ERROR) << "op infer shape failed.";
@ -397,10 +394,9 @@ int Scheduler::Schedule(std::vector<kernel::KernelExec *> *dst_kernels) {
MS_LOG(ERROR) << "CheckInputParam failed! ret: " << check_input_ret;
return check_input_ret;
}
#ifndef RUNTIME_PASS_CLIP
shape_fusion_pass_ = std::make_shared<ShapeFusionPass>(reinterpret_cast<LiteModel *>(src_model_), src_tensors_);
#endif
shape_fusion_pass_ = std::make_shared<ShapeFusionPass>(reinterpret_cast<LiteModel *>(src_model_), src_tensors_);
MS_CHECK_TRUE_RET(shape_fusion_pass_ != nullptr, RET_ERROR);
int ret = SchedulePreProcess();
if (ret != RET_OK) {
return ret;
@ -428,15 +424,9 @@ int Scheduler::Schedule(std::vector<kernel::KernelExec *> *dst_kernels) {
return ret;
}
}
shape_fusion_pass_->FreeOutputTensorDataOfFusedShape();
#ifndef DELEGATE_CLIP
#ifndef RUNTIME_PASS_CLIP
// Free the output tensor data of shape fusion.
for (auto tensor : shape_fusion_outputs_) {
tensor->FreeData();
tensor->set_category(VAR);
}
#endif
ret = InitDelegateKernels(dst_kernels);
if (ret != RET_OK) {
MS_LOG(ERROR) << "Repalce delegate kernels failed.";
@ -467,13 +457,11 @@ int Scheduler::Schedule(std::vector<kernel::KernelExec *> *dst_kernels) {
}
#endif
#ifndef RUNTIME_PASS_CLIP
auto status = RuntimePass(dst_kernels, src_tensors_);
if (status != RET_OK) {
MS_LOG(ERROR) << "runtime pass failed.";
return RET_ERROR;
}
#endif
ret = InitKernels(std::move(*dst_kernels));
if (ret != RET_OK) {
@ -742,11 +730,6 @@ int Scheduler::InferNodeShape(const lite::Model::Node *node) {
return RET_ERROR;
}
}
#if !defined(RUNTIME_PASS_CLIP) && !defined(DELEGATE_CLIP)
if (node->node_type_ == PrimType_Inner_ShapeFusion) {
shape_fusion_outputs_.insert(shape_fusion_outputs_.end(), outputs.begin(), outputs.end());
}
#endif
} else if (ret != RET_INFER_INVALID) {
FreeOpParameters();
return RET_ERROR;
@ -877,17 +860,11 @@ int Scheduler::InferSubGraphShape(size_t subgraph_index) {
MS_LOG(ERROR) << "Op " << node->name_ << " should exist in model!";
return RET_ERROR;
}
#ifndef RUNTIME_PASS_CLIP
if (node->node_type_ == schema::PrimitiveType_Shape) {
// convert shape to built-in shape
MS_CHECK_TRUE_RET(node->input_indices_.size() == 1, RET_ERROR);
if (shape_fusion_pass_->ConvertToShapeFusion(node) != RET_OK) {
MS_LOG(WARNING) << "Convert to built-in shape failed: " << node->name_;
} else if (shape_fusion_pass_->FusePostNodes(node, subgraph_index) != RET_OK) {
MS_LOG(WARNING) << "Fused to built-in shape failed: " << node->name_;
}
shape_fusion_pass_->Run(node, subgraph_index);
}
#endif
auto ret = InferNodeShape(node);
if (ret == RET_INFER_INVALID) {
MS_LOG(INFO) << "InferShape interrupted, name: " << node->name_

View File

@ -35,9 +35,7 @@
#ifndef CONTROLFLOW_TENSORLIST_CLIP
#include "src/control_flow/control_flow_scheduler.h"
#endif
#ifndef RUNTIME_PASS_CLIP
#include "src/runtime/runtime_shape_fusion_pass.h"
#endif
namespace mindspore::lite {
constexpr int kDefaultDeviceType = -1;
@ -180,12 +178,7 @@ class Scheduler {
int schema_version_ = SCHEMA_VERSION::SCHEMA_CUR;
std::map<std::string, TypeId> *execution_plan_ = nullptr;
const std::map<std::string, std::map<std::string, std::string>> *config_info_ = nullptr;
#ifndef RUNTIME_PASS_CLIP
std::shared_ptr<ShapeFusionPass> shape_fusion_pass_ = nullptr;
#ifndef DELEGATE_CLIP
std::vector<Tensor *> shape_fusion_outputs_;
#endif
#endif
};
} // namespace mindspore::lite

View File

@ -1 +1 @@
856848
858546

View File

@ -1 +1 @@
856848
858546