forked from mindspore-Ecosystem/mindspore
!32893 [MSLITE][Bug][Func] Adjust runtime shape fusion pass.
Merge pull request !32893 from wangshaocong/bugfix
This commit is contained in:
commit
bd318b94af
|
@ -114,6 +114,8 @@ set(LITE_SRC
|
|||
${CMAKE_CURRENT_SOURCE_DIR}/runtime/inner_allocator.cc
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/runtime/runtime_allocator.cc
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/runtime/infer_manager.cc
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/runtime/runtime_shape_fusion_pass.cc
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/runtime/runtime_pass.cc
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/schema_tensor_wrapper.cc
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/tensor.cc
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/tensor_category.cc
|
||||
|
@ -205,13 +207,6 @@ if(MSLITE_ENABLE_STRING_KERNEL)
|
|||
${CMAKE_CURRENT_SOURCE_DIR}/common/string_util.cc
|
||||
)
|
||||
endif()
|
||||
if(MSLITE_ENABLE_RUNTIME_PASS)
|
||||
set(LITE_SRC
|
||||
${LITE_SRC}
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/runtime/runtime_shape_fusion_pass.cc
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/runtime/runtime_pass.cc
|
||||
)
|
||||
endif()
|
||||
|
||||
if(MSLITE_ENABLE_CONTROLFLOW)
|
||||
set(LITE_SRC
|
||||
|
|
|
@ -17,9 +17,7 @@
|
|||
#include "src/lite_session.h"
|
||||
#include <set>
|
||||
#include "src/pack_weight_manager.h"
|
||||
#ifndef RUNTIME_PASS_CLIP
|
||||
#include "src/runtime/runtime_pass.h"
|
||||
#endif
|
||||
#if defined(LINUX_RUNTIME)
|
||||
#include <malloc.h>
|
||||
#endif
|
||||
|
@ -1262,13 +1260,12 @@ int LiteSession::Resize(const std::vector<mindspore::tensor::MSTensor *> &inputs
|
|||
is_running_.store(false);
|
||||
return RET_ERROR;
|
||||
}
|
||||
#ifndef RUNTIME_PASS_CLIP
|
||||
|
||||
auto status = GraphOptimizePass(&kernels_);
|
||||
if (status != RET_OK) {
|
||||
MS_LOG(ERROR) << "GraphOptimizePass failed.";
|
||||
return RET_ERROR;
|
||||
}
|
||||
#endif
|
||||
|
||||
is_running_.store(false);
|
||||
#if defined(LINUX_RUNTIME)
|
||||
|
|
|
@ -15,6 +15,7 @@
|
|||
*/
|
||||
|
||||
#include "src/runtime/kernel/cpu/fp32/shape_fusion_fp32.h"
|
||||
#include <algorithm>
|
||||
#include "src/kernel_registry.h"
|
||||
#include "include/errorcode.h"
|
||||
#include "src/common/log_adapter.h"
|
||||
|
@ -34,11 +35,12 @@ int ShapeFusionCPUKernel::Prepare() {
|
|||
int ShapeFusionCPUKernel::ReSize() { return KernelInferShape(in_tensors_, out_tensors_, op_parameter_); }
|
||||
|
||||
int ShapeFusionCPUKernel::Run() {
|
||||
#ifndef DELEGATE_CLIP
|
||||
bool is_const =
|
||||
std::all_of(out_tensors_.begin(), out_tensors_.end(), [](lite::Tensor *tensor) { return tensor->IsConst(); });
|
||||
if (is_const) {
|
||||
return RET_OK;
|
||||
}
|
||||
return KernelInferShape(in_tensors_, out_tensors_, op_parameter_);
|
||||
#else
|
||||
return RET_OK;
|
||||
#endif
|
||||
}
|
||||
|
||||
REG_KERNEL(kCPU, kNumberTypeInt32, PrimType_Inner_ShapeFusion, LiteKernelCreator<ShapeFusionCPUKernel>)
|
||||
|
|
|
@ -17,11 +17,12 @@
|
|||
#include "src/runtime/runtime_pass.h"
|
||||
#include "nnacl/conv_parameter.h"
|
||||
|
||||
namespace mindspore::lite {
|
||||
#ifndef RUNTIME_PASS_CLIP
|
||||
namespace {
|
||||
const constexpr int kMaxDepth = 2048;
|
||||
}
|
||||
|
||||
namespace mindspore::lite {
|
||||
void Nc4hw4PassReplace(std::vector<kernel::KernelExec *> *kernels, std::vector<Tensor *> *tensors, size_t index) {
|
||||
kernel::KernelExec *conv_kernel = kernels->at(index);
|
||||
kernel::KernelExec *transpose_kernel = conv_kernel->out_kernels().front();
|
||||
|
@ -287,8 +288,10 @@ STATUS DeleteRedundantTrans(std::vector<kernel::KernelExec *> *kernels) {
|
|||
}
|
||||
return RET_OK;
|
||||
}
|
||||
#endif
|
||||
|
||||
STATUS RuntimePass(std::vector<kernel::KernelExec *> *subgraphs, std::vector<Tensor *> *tensors) {
|
||||
#ifndef RUNTIME_PASS_CLIP
|
||||
for (auto subgraph : *subgraphs) {
|
||||
auto sub = reinterpret_cast<kernel::SubGraphKernel *>(subgraph);
|
||||
if (RuntimePassValid(sub) == false) {
|
||||
|
@ -305,10 +308,12 @@ STATUS RuntimePass(std::vector<kernel::KernelExec *> *subgraphs, std::vector<Ten
|
|||
return RET_ERROR;
|
||||
}
|
||||
}
|
||||
#endif
|
||||
return RET_OK;
|
||||
}
|
||||
|
||||
STATUS GraphOptimizePass(std::vector<kernel::KernelExec *> *sub_graphs) {
|
||||
#ifndef RUNTIME_PASS_CLIP
|
||||
for (auto subgraph : *sub_graphs) {
|
||||
auto sub_graph = reinterpret_cast<kernel::SubGraphKernel *>(subgraph);
|
||||
if (RuntimePassValid(sub_graph) == false) {
|
||||
|
@ -321,6 +326,7 @@ STATUS GraphOptimizePass(std::vector<kernel::KernelExec *> *sub_graphs) {
|
|||
return RET_ERROR;
|
||||
}
|
||||
}
|
||||
#endif
|
||||
return RET_OK;
|
||||
}
|
||||
} // namespace mindspore::lite
|
||||
|
|
|
@ -17,7 +17,6 @@
|
|||
#ifndef MINDSPORE_LITE_SRC_RUNTIME_RUNTIME_PASS_H_
|
||||
#define MINDSPORE_LITE_SRC_RUNTIME_RUNTIME_PASS_H_
|
||||
|
||||
#ifndef RUNTIME_PASS_CLIP
|
||||
#include <vector>
|
||||
#include "src/kernel_exec.h"
|
||||
#include "src/sub_graph_kernel.h"
|
||||
|
@ -25,9 +24,9 @@
|
|||
#include "schema/model_generated.h"
|
||||
|
||||
namespace mindspore::lite {
|
||||
|
||||
STATUS RuntimePass(std::vector<kernel::KernelExec *> *subgraphs, std::vector<Tensor *> *tensors);
|
||||
STATUS GraphOptimizePass(std::vector<kernel::KernelExec *> *sub_graphs);
|
||||
#ifndef RUNTIME_PASS_CLIP
|
||||
/* Nc4hw4 PASS
|
||||
* before : --(nhwc)-- CONV --(nhwc)-- TRANSPOSE --(nchw)-- IN --(nchw)-- TRANSPOSE --(nhwc)--
|
||||
* after : --(nhwc)-- CONV --(nc4hw4)-- IN --(nhwc)--
|
||||
|
@ -47,7 +46,6 @@ static const std::vector<schema::PrimitiveType> Nc4hw4FormatInOpList = {schema::
|
|||
static const schema::PrimitiveType ConvNormC4OpConv2DFusion = schema::PrimitiveType_Conv2DFusion;
|
||||
static const schema::PrimitiveType ConvNormC4OpActivation = schema::PrimitiveType_Activation;
|
||||
static const schema::PrimitiveType ConvNormC4OpInstanceNorm = schema::PrimitiveType_InstanceNorm;
|
||||
|
||||
} // namespace mindspore::lite
|
||||
#endif
|
||||
} // namespace mindspore::lite
|
||||
#endif // MINDSPORE_LITE_SRC_RUNTIME_RUNTIME_PASS_H_
|
||||
|
|
|
@ -14,6 +14,7 @@
|
|||
* limitations under the License.
|
||||
*/
|
||||
|
||||
#ifndef RUNTIME_PASS_CLIP
|
||||
#include "src/runtime/runtime_shape_fusion_pass.h"
|
||||
#include <set>
|
||||
#include <queue>
|
||||
|
@ -21,13 +22,11 @@
|
|||
#include "include/errorcode.h"
|
||||
#include "src/common/log_adapter.h"
|
||||
#include "nnacl/op_base.h"
|
||||
#include "schema/inner/model_generated.h"
|
||||
|
||||
namespace mindspore::lite {
|
||||
namespace {
|
||||
constexpr size_t kInitialSize = 1024;
|
||||
} // namespace
|
||||
|
||||
namespace mindspore::lite {
|
||||
int ShapeFusionPass::ConvertToShapeFusion(Model::Node *node) {
|
||||
MS_ASSERT(node != nullptr);
|
||||
auto input_tensor = src_tensors_->at(node->input_indices_.front());
|
||||
|
@ -382,3 +381,4 @@ int ShapeFusionPass::GetFusionMatrixFromConstantTensor(const lite::Tensor *tenso
|
|||
return RET_OK;
|
||||
}
|
||||
} // namespace mindspore::lite
|
||||
#endif
|
||||
|
|
|
@ -17,15 +17,16 @@
|
|||
#ifndef MINDSPORE_LITE_SRC_RUNTIME_RUNTIME_SHAPE_FUSION_PASS_H_
|
||||
#define MINDSPORE_LITE_SRC_RUNTIME_RUNTIME_SHAPE_FUSION_PASS_H_
|
||||
|
||||
#ifndef RUNTIME_PASS_CLIP
|
||||
#include <map>
|
||||
#include <vector>
|
||||
#include <algorithm>
|
||||
#include "src/lite_model.h"
|
||||
#include "src/common/tensor_util.h"
|
||||
#include "schema/ops_generated.h"
|
||||
#include "schema/model_generated.h"
|
||||
|
||||
namespace mindspore::lite {
|
||||
#ifndef RUNTIME_PASS_CLIP
|
||||
struct ShapeFusionMatrix {
|
||||
ShapeFusionMatrix() {}
|
||||
explicit ShapeFusionMatrix(size_t dim) {
|
||||
|
@ -78,6 +79,7 @@ struct ShapeFusionMatrix {
|
|||
std::vector<std::vector<float>> shape_matrix;
|
||||
bool scalar = false;
|
||||
};
|
||||
#endif
|
||||
|
||||
class ShapeFusionPass {
|
||||
public:
|
||||
|
@ -92,10 +94,31 @@ class ShapeFusionPass {
|
|||
}
|
||||
~ShapeFusionPass() = default;
|
||||
|
||||
int ConvertToShapeFusion(Model::Node *node);
|
||||
int FusePostNodes(Model::Node *node, size_t subgraph_index);
|
||||
void Run(Model::Node *node, size_t subgraph_index) {
|
||||
#ifndef RUNTIME_PASS_CLIP
|
||||
if (ConvertToShapeFusion(node) != RET_OK) {
|
||||
MS_LOG(WARNING) << "Convert to built-in shape failed: " << node->name_;
|
||||
} else if (FusePostNodes(node, subgraph_index) != RET_OK) {
|
||||
MS_LOG(WARNING) << "Fused to built-in shape failed: " << node->name_;
|
||||
}
|
||||
std::transform(node->output_indices_.begin(), node->output_indices_.end(),
|
||||
std::back_inserter(shape_fusion_outputs_),
|
||||
[&](uint32_t idx) { return this->src_tensors_->at(idx); });
|
||||
#endif
|
||||
}
|
||||
void FreeOutputTensorDataOfFusedShape() {
|
||||
#if !defined(RUNTIME_PASS_CLIP)
|
||||
for (auto tensor : shape_fusion_outputs_) {
|
||||
tensor->FreeData();
|
||||
tensor->set_category(VAR);
|
||||
}
|
||||
#endif
|
||||
}
|
||||
|
||||
private:
|
||||
#ifndef RUNTIME_PASS_CLIP
|
||||
int ConvertToShapeFusion(Model::Node *node);
|
||||
int FusePostNodes(Model::Node *node, size_t subgraph_index);
|
||||
Tensor *BuildTensorFromShapeFusionMatrix(const ShapeFusionMatrix &shape_fusion_matrix);
|
||||
bool CheckCanFused(const Model::Node *shape_fusion, const Model::Node *post_node, uint32_t input_idx,
|
||||
size_t subgraph_index);
|
||||
|
@ -108,12 +131,13 @@ class ShapeFusionPass {
|
|||
ShapeFusionMatrix *constant_matrix);
|
||||
|
||||
private:
|
||||
std::map<uint32_t, ShapeFusionMatrix> shape_fusion_matrices_;
|
||||
std::vector<lite::Tensor *> shape_fusion_outputs_;
|
||||
#endif
|
||||
LiteModel *lite_model_ = nullptr;
|
||||
const std::vector<Model::Node *> *all_nodes_ = nullptr;
|
||||
std::vector<lite::Tensor *> *src_tensors_ = nullptr;
|
||||
std::map<uint32_t, std::vector<Model::Node *>> used_nodes_;
|
||||
std::map<uint32_t, ShapeFusionMatrix> shape_fusion_matrices_;
|
||||
};
|
||||
} // namespace mindspore::lite
|
||||
#endif
|
||||
#endif // MINDSPORE_LITE_SRC_RUNTIME_RUNTIME_SHAPE_FUSION_PASS_H_
|
||||
|
|
|
@ -39,9 +39,7 @@
|
|||
#include "src/common/tensor_util.h"
|
||||
#include "src/common/context_util.h"
|
||||
#include "src/runtime/infer_manager.h"
|
||||
#ifndef RUNTIME_PASS_CLIP
|
||||
#include "src/runtime/runtime_pass.h"
|
||||
#endif
|
||||
#ifndef AUTO_PARALLEL_CLIP
|
||||
#include "src/sub_graph_split.h"
|
||||
#endif
|
||||
|
@ -259,7 +257,6 @@ int Scheduler::SchedulePreProcess() {
|
|||
schema_version_ = reinterpret_cast<LiteModel *>(src_model_)->GetSchemaVersion();
|
||||
|
||||
this->graph_output_node_indexes_ = GetGraphOutputNodes(src_model_);
|
||||
|
||||
*is_infershape_ = InferSubGraphShape(kMainSubGraphIndex);
|
||||
if (*is_infershape_ != RET_OK && *is_infershape_ != RET_INFER_INVALID) {
|
||||
MS_LOG(ERROR) << "op infer shape failed.";
|
||||
|
@ -397,10 +394,9 @@ int Scheduler::Schedule(std::vector<kernel::KernelExec *> *dst_kernels) {
|
|||
MS_LOG(ERROR) << "CheckInputParam failed! ret: " << check_input_ret;
|
||||
return check_input_ret;
|
||||
}
|
||||
#ifndef RUNTIME_PASS_CLIP
|
||||
shape_fusion_pass_ = std::make_shared<ShapeFusionPass>(reinterpret_cast<LiteModel *>(src_model_), src_tensors_);
|
||||
#endif
|
||||
|
||||
shape_fusion_pass_ = std::make_shared<ShapeFusionPass>(reinterpret_cast<LiteModel *>(src_model_), src_tensors_);
|
||||
MS_CHECK_TRUE_RET(shape_fusion_pass_ != nullptr, RET_ERROR);
|
||||
int ret = SchedulePreProcess();
|
||||
if (ret != RET_OK) {
|
||||
return ret;
|
||||
|
@ -428,15 +424,9 @@ int Scheduler::Schedule(std::vector<kernel::KernelExec *> *dst_kernels) {
|
|||
return ret;
|
||||
}
|
||||
}
|
||||
shape_fusion_pass_->FreeOutputTensorDataOfFusedShape();
|
||||
|
||||
#ifndef DELEGATE_CLIP
|
||||
#ifndef RUNTIME_PASS_CLIP
|
||||
// Free the output tensor data of shape fusion.
|
||||
for (auto tensor : shape_fusion_outputs_) {
|
||||
tensor->FreeData();
|
||||
tensor->set_category(VAR);
|
||||
}
|
||||
#endif
|
||||
ret = InitDelegateKernels(dst_kernels);
|
||||
if (ret != RET_OK) {
|
||||
MS_LOG(ERROR) << "Repalce delegate kernels failed.";
|
||||
|
@ -467,13 +457,11 @@ int Scheduler::Schedule(std::vector<kernel::KernelExec *> *dst_kernels) {
|
|||
}
|
||||
#endif
|
||||
|
||||
#ifndef RUNTIME_PASS_CLIP
|
||||
auto status = RuntimePass(dst_kernels, src_tensors_);
|
||||
if (status != RET_OK) {
|
||||
MS_LOG(ERROR) << "runtime pass failed.";
|
||||
return RET_ERROR;
|
||||
}
|
||||
#endif
|
||||
|
||||
ret = InitKernels(std::move(*dst_kernels));
|
||||
if (ret != RET_OK) {
|
||||
|
@ -742,11 +730,6 @@ int Scheduler::InferNodeShape(const lite::Model::Node *node) {
|
|||
return RET_ERROR;
|
||||
}
|
||||
}
|
||||
#if !defined(RUNTIME_PASS_CLIP) && !defined(DELEGATE_CLIP)
|
||||
if (node->node_type_ == PrimType_Inner_ShapeFusion) {
|
||||
shape_fusion_outputs_.insert(shape_fusion_outputs_.end(), outputs.begin(), outputs.end());
|
||||
}
|
||||
#endif
|
||||
} else if (ret != RET_INFER_INVALID) {
|
||||
FreeOpParameters();
|
||||
return RET_ERROR;
|
||||
|
@ -877,17 +860,11 @@ int Scheduler::InferSubGraphShape(size_t subgraph_index) {
|
|||
MS_LOG(ERROR) << "Op " << node->name_ << " should exist in model!";
|
||||
return RET_ERROR;
|
||||
}
|
||||
#ifndef RUNTIME_PASS_CLIP
|
||||
if (node->node_type_ == schema::PrimitiveType_Shape) {
|
||||
// convert shape to built-in shape
|
||||
MS_CHECK_TRUE_RET(node->input_indices_.size() == 1, RET_ERROR);
|
||||
if (shape_fusion_pass_->ConvertToShapeFusion(node) != RET_OK) {
|
||||
MS_LOG(WARNING) << "Convert to built-in shape failed: " << node->name_;
|
||||
} else if (shape_fusion_pass_->FusePostNodes(node, subgraph_index) != RET_OK) {
|
||||
MS_LOG(WARNING) << "Fused to built-in shape failed: " << node->name_;
|
||||
}
|
||||
shape_fusion_pass_->Run(node, subgraph_index);
|
||||
}
|
||||
#endif
|
||||
auto ret = InferNodeShape(node);
|
||||
if (ret == RET_INFER_INVALID) {
|
||||
MS_LOG(INFO) << "InferShape interrupted, name: " << node->name_
|
||||
|
|
|
@ -35,9 +35,7 @@
|
|||
#ifndef CONTROLFLOW_TENSORLIST_CLIP
|
||||
#include "src/control_flow/control_flow_scheduler.h"
|
||||
#endif
|
||||
#ifndef RUNTIME_PASS_CLIP
|
||||
#include "src/runtime/runtime_shape_fusion_pass.h"
|
||||
#endif
|
||||
|
||||
namespace mindspore::lite {
|
||||
constexpr int kDefaultDeviceType = -1;
|
||||
|
@ -180,12 +178,7 @@ class Scheduler {
|
|||
int schema_version_ = SCHEMA_VERSION::SCHEMA_CUR;
|
||||
std::map<std::string, TypeId> *execution_plan_ = nullptr;
|
||||
const std::map<std::string, std::map<std::string, std::string>> *config_info_ = nullptr;
|
||||
#ifndef RUNTIME_PASS_CLIP
|
||||
std::shared_ptr<ShapeFusionPass> shape_fusion_pass_ = nullptr;
|
||||
#ifndef DELEGATE_CLIP
|
||||
std::vector<Tensor *> shape_fusion_outputs_;
|
||||
#endif
|
||||
#endif
|
||||
};
|
||||
} // namespace mindspore::lite
|
||||
|
||||
|
|
|
@ -1 +1 @@
|
|||
856848
|
||||
858546
|
||||
|
|
|
@ -1 +1 @@
|
|||
856848
|
||||
858546
|
||||
|
|
Loading…
Reference in New Issue