!45023 [Lite] sync code check
Merge pull request !45023 from yangruoqi713/master
This commit is contained in:
commit
de4a5a18fa
|
@ -146,7 +146,7 @@ int MaxPoolGradGradCpuKernelMod::Resize(const BaseOperatorPtr &base_operator,
|
||||||
param_->output_channel_ = LongToInt(out_shapes_[kDim1]);
|
param_->output_channel_ = LongToInt(out_shapes_[kDim1]);
|
||||||
param_->output_h_ = LongToInt(out_shapes_[height_index_]);
|
param_->output_h_ = LongToInt(out_shapes_[height_index_]);
|
||||||
param_->output_w_ = LongToInt(out_shapes_[width_index_]);
|
param_->output_w_ = LongToInt(out_shapes_[width_index_]);
|
||||||
output_elements_ = std::accumulate(out_shapes_.begin(), out_shapes_.end(), 1, std::multiplies<size_t>());
|
output_elements_ = LongToSize(std::accumulate(out_shapes_.begin(), out_shapes_.end(), 1, std::multiplies<int64_t>()));
|
||||||
|
|
||||||
if (dim_ == kMaxPool3DGradGradDim) {
|
if (dim_ == kMaxPool3DGradGradDim) {
|
||||||
reinterpret_cast<Pooling3DParameter *>(param_)->input_d_ = LongToInt(in_shapes_[depth_index_]);
|
reinterpret_cast<Pooling3DParameter *>(param_)->input_d_ = LongToInt(in_shapes_[depth_index_]);
|
||||||
|
|
|
@ -82,14 +82,14 @@ int InitCalVec(size_t *in_strides, size_t *out_strides, size_t *pos, const size_
|
||||||
|
|
||||||
#define COPY_TASK_IMPL(type0, type1) \
|
#define COPY_TASK_IMPL(type0, type1) \
|
||||||
int CopyTask_Input_##type0##_Index_##type1( \
|
int CopyTask_Input_##type0##_Index_##type1( \
|
||||||
type0 *output, const type0 *input, const type1 *index, size_t cur_dim, size_t *pos, const int dim, \
|
type0 *output, const type0 *input, const type1 *index, size_t cur_dim, size_t *pos, const size_t dim, \
|
||||||
const size_t *output_shape, const size_t output_shape_size, const size_t *in_strides, const size_t *out_strides) { \
|
const size_t *output_shape, const size_t output_shape_size, const size_t *in_strides, const size_t *out_strides) { \
|
||||||
if (pos == NULL || out_strides == NULL || in_strides == NULL) { \
|
if (pos == NULL || out_strides == NULL || in_strides == NULL) { \
|
||||||
return NNACL_NULL_PTR; \
|
return NNACL_NULL_PTR; \
|
||||||
} \
|
} \
|
||||||
for (size_t i = 0; i < output_shape[cur_dim]; ++i) { \
|
for (size_t i = 0; i < output_shape[cur_dim]; ++i) { \
|
||||||
pos[cur_dim] = i; \
|
pos[cur_dim] = i; \
|
||||||
if (cur_dim == (int)output_shape_size - 1) { \
|
if (cur_dim == output_shape_size - 1) { \
|
||||||
size_t input_offset = 0; \
|
size_t input_offset = 0; \
|
||||||
size_t out_offset = 0; \
|
size_t out_offset = 0; \
|
||||||
for (size_t j = 0; j < output_shape_size; ++j) { \
|
for (size_t j = 0; j < output_shape_size; ++j) { \
|
||||||
|
|
|
@ -28,10 +28,10 @@ extern "C" {
|
||||||
GatherD_Input_##type0##_Index_##type1(output, input, index, input_shape, input_shape_size, output_shape, \
|
GatherD_Input_##type0##_Index_##type1(output, input, index, input_shape, input_shape_size, output_shape, \
|
||||||
output_shape_size, dim)
|
output_shape_size, dim)
|
||||||
|
|
||||||
#define GATHER_D_IMPL_DECLARATION(type0, type1) \
|
#define GATHER_D_IMPL_DECLARATION(type0, type1) \
|
||||||
int GatherD_Input_##type0##_Index_##type1(type0 *output, const type0 *input, type1 *index, \
|
int GatherD_Input_##type0##_Index_##type1( \
|
||||||
const size_t *input_shape, const size_t input_shape_size, \
|
type0 *output, const type0 *input, type1 *index, const size_t *input_shape, const size_t input_shape_size, \
|
||||||
const size_t *output_shape, const size_t output_shape_size, const int dim)
|
const size_t *output_shape, const size_t output_shape_size, const size_t dim)
|
||||||
|
|
||||||
GATHER_D_IMPL_DECLARATION(bool, int32_t);
|
GATHER_D_IMPL_DECLARATION(bool, int32_t);
|
||||||
GATHER_D_IMPL_DECLARATION(bool, int64_t);
|
GATHER_D_IMPL_DECLARATION(bool, int64_t);
|
||||||
|
|
|
@ -22,7 +22,7 @@
|
||||||
extern "C" {
|
extern "C" {
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
static inline void ApproximateZerosLike(void *output, int data_size) {
|
static inline void ApproximateZerosLike(void *output, size_t data_size) {
|
||||||
(void)memset(output, 0, data_size);
|
(void)memset(output, 0, data_size);
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
|
|
|
@ -18,11 +18,11 @@
|
||||||
#include "nnacl/errorcode.h"
|
#include "nnacl/errorcode.h"
|
||||||
#include "nnacl/op_base.h"
|
#include "nnacl/op_base.h"
|
||||||
|
|
||||||
int InvertPermutation(const int *input, int *output, int num) {
|
int InvertPermutation(const int *input, int *output, size_t num) {
|
||||||
NNACL_CHECK_NULL_RETURN_ERR(input);
|
NNACL_CHECK_NULL_RETURN_ERR(input);
|
||||||
NNACL_CHECK_NULL_RETURN_ERR(output);
|
NNACL_CHECK_NULL_RETURN_ERR(output);
|
||||||
for (int i = 0; i < num; i++) {
|
for (size_t i = 0; i < num; i++) {
|
||||||
int index = input[i];
|
size_t index = (size_t)input[i];
|
||||||
if (index >= num) {
|
if (index >= num) {
|
||||||
return NNACL_ERR;
|
return NNACL_ERR;
|
||||||
}
|
}
|
||||||
|
|
|
@ -16,10 +16,12 @@
|
||||||
#ifndef MINDSPORE_NNACL_INVERT_PERMUTATION_FP32_H_
|
#ifndef MINDSPORE_NNACL_INVERT_PERMUTATION_FP32_H_
|
||||||
#define MINDSPORE_NNACL_INVERT_PERMUTATION_FP32_H_
|
#define MINDSPORE_NNACL_INVERT_PERMUTATION_FP32_H_
|
||||||
|
|
||||||
|
#include <stddef.h>
|
||||||
|
|
||||||
#ifdef __cplusplus
|
#ifdef __cplusplus
|
||||||
extern "C" {
|
extern "C" {
|
||||||
#endif
|
#endif
|
||||||
int InvertPermutation(const int *input, int *output, int num);
|
int InvertPermutation(const int *input, int *output, size_t num);
|
||||||
#ifdef __cplusplus
|
#ifdef __cplusplus
|
||||||
}
|
}
|
||||||
#endif
|
#endif
|
||||||
|
|
|
@ -155,7 +155,7 @@ void WinogradInputTransformOptStep(const float *input_data, float *trans_input,
|
||||||
// input transform
|
// input transform
|
||||||
const int block_tile = C12NUM;
|
const int block_tile = C12NUM;
|
||||||
int dst_ic8_offset = dst_plane_offset + ic * block_tile * input_unit * input_unit * channel_tile;
|
int dst_ic8_offset = dst_plane_offset + ic * block_tile * input_unit * input_unit * channel_tile;
|
||||||
size_t dst_step = input_unit * block_tile * channel_tile;
|
size_t dst_step = (size_t)(input_unit * block_tile * channel_tile);
|
||||||
float *trans_input_ptr = trans_input + dst_ic8_offset;
|
float *trans_input_ptr = trans_input + dst_ic8_offset;
|
||||||
func(tmp_data, trans_input_ptr, channel_tile, dst_step, block_tile * channel_tile);
|
func(tmp_data, trans_input_ptr, channel_tile, dst_step, block_tile * channel_tile);
|
||||||
}
|
}
|
||||||
|
|
|
@ -69,7 +69,6 @@ int exp_do_compute(void *param, int task_id, float lhs_scale, float rhs_scale) {
|
||||||
void *output_data = exp_stru->base.out[0].data_;
|
void *output_data = exp_stru->base.out[0].data_;
|
||||||
NNACL_CHECK_NULL_RETURN_ERR(output_data);
|
NNACL_CHECK_NULL_RETURN_ERR(output_data);
|
||||||
int ret = exp_stru->base.funcs->ExpFusion(input_data, output_data, exp_param, task_id);
|
int ret = exp_stru->base.funcs->ExpFusion(input_data, output_data, exp_param, task_id);
|
||||||
|
|
||||||
return ret;
|
return ret;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -163,7 +163,7 @@ class LiteTensorImpl : public MutableTensorImpl {
|
||||||
return lite_tensor_->format();
|
return lite_tensor_->format();
|
||||||
}
|
}
|
||||||
|
|
||||||
void SetFormat(mindspore::Format format) override {
|
void SetFormat(const mindspore::Format format) override {
|
||||||
if (lite_tensor_ == nullptr) {
|
if (lite_tensor_ == nullptr) {
|
||||||
MS_LOG(ERROR) << "Invalid tensor.";
|
MS_LOG(ERROR) << "Invalid tensor.";
|
||||||
return;
|
return;
|
||||||
|
|
|
@ -87,7 +87,7 @@ lite::Tensor *CreateConstTensor(const lite::Tensor *tensor, const std::vector<in
|
||||||
delete new_tensor;
|
delete new_tensor;
|
||||||
return nullptr;
|
return nullptr;
|
||||||
}
|
}
|
||||||
memcpy(new_tensor->data(), reinterpret_cast<void *>(new_tensor_data), new_tensor->Size());
|
(void)memcpy(new_tensor->data(), reinterpret_cast<void *>(new_tensor_data), new_tensor->Size());
|
||||||
return new_tensor;
|
return new_tensor;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -107,7 +107,7 @@ int ReduceBaseCPUKernel::Prepare() {
|
||||||
MS_CHECK_TRUE_MSG(axes_tensor != nullptr, RET_ERROR, "axes-tensor is a nullptr.");
|
MS_CHECK_TRUE_MSG(axes_tensor != nullptr, RET_ERROR, "axes-tensor is a nullptr.");
|
||||||
MS_CHECK_FALSE_MSG((axes_tensor->data_type() != kNumberTypeInt && axes_tensor->data_type() != kNumberTypeInt32),
|
MS_CHECK_FALSE_MSG((axes_tensor->data_type() != kNumberTypeInt && axes_tensor->data_type() != kNumberTypeInt32),
|
||||||
RET_ERROR, "The data type of axes tensor should be int32");
|
RET_ERROR, "The data type of axes tensor should be int32");
|
||||||
num_axes_ = axes_tensor->ElementsNum();
|
num_axes_ = static_cast<int>(axes_tensor->ElementsNum());
|
||||||
if (axes_tensor->data() != nullptr && (num_axes_ <= 0 || num_axes_ > MAX_SHAPE_SIZE)) {
|
if (axes_tensor->data() != nullptr && (num_axes_ <= 0 || num_axes_ > MAX_SHAPE_SIZE)) {
|
||||||
MS_LOG(ERROR) << "input axes invalid.";
|
MS_LOG(ERROR) << "input axes invalid.";
|
||||||
return RET_ERROR;
|
return RET_ERROR;
|
||||||
|
|
|
@ -348,7 +348,7 @@ OpParameter *AffineFp32CPUKernel::MatmulParameterCreate() {
|
||||||
matmul_param->a_transpose_ = origin_matmul->a_transpose_;
|
matmul_param->a_transpose_ = origin_matmul->a_transpose_;
|
||||||
matmul_param->has_bias_ = origin_matmul->has_bias_;
|
matmul_param->has_bias_ = origin_matmul->has_bias_;
|
||||||
matmul_param->act_type_ = origin_matmul->act_type_;
|
matmul_param->act_type_ = origin_matmul->act_type_;
|
||||||
matmul_param->op_parameter_.thread_num_ = this->context()->thread_num_;
|
matmul_param->op_parameter_.thread_num_ = op_parameter_->thread_num_;
|
||||||
return reinterpret_cast<OpParameter *>(matmul_param);
|
return reinterpret_cast<OpParameter *>(matmul_param);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -38,10 +38,7 @@ class BatchnormCPUKernel : public LiteKernel {
|
||||||
int SetupVirtualBatch(int virtual_batch_multiplier, int param) override;
|
int SetupVirtualBatch(int virtual_batch_multiplier, int param) override;
|
||||||
virtual int InitConstTensor();
|
virtual int InitConstTensor();
|
||||||
virtual int DoExecute(int task_id);
|
virtual int DoExecute(int task_id);
|
||||||
virtual int Batchnorm2Scale(const void *scale_data, const void *bias_data, const void *mean_data,
|
virtual int Batchnorm2Scale(const void *, const void *, const void *, const void *, float, int) { return RET_OK; }
|
||||||
const void *var_data, float eps, int kernel_num) {
|
|
||||||
return RET_OK;
|
|
||||||
}
|
|
||||||
virtual int set_momentum(float momentum);
|
virtual int set_momentum(float momentum);
|
||||||
virtual float get_momentum();
|
virtual float get_momentum();
|
||||||
virtual int RestoreDefaultMomentum();
|
virtual int RestoreDefaultMomentum();
|
||||||
|
|
|
@ -312,9 +312,8 @@ kernel::LiteKernel *ConvolutionDelegateCPUKernel::CpuConvFp32KernelSelect() {
|
||||||
op_parameter_ = nullptr;
|
op_parameter_ = nullptr;
|
||||||
return nullptr;
|
return nullptr;
|
||||||
}
|
}
|
||||||
|
kernel->set_name("act_" + name_);
|
||||||
}
|
}
|
||||||
|
|
||||||
kernel->set_name("act_" + name_);
|
|
||||||
return kernel;
|
return kernel;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -43,7 +43,7 @@ int FillCPUKernel::ReSize() {
|
||||||
}
|
}
|
||||||
auto output = out_tensors_.front();
|
auto output = out_tensors_.front();
|
||||||
CHECK_NULL_RETURN(output);
|
CHECK_NULL_RETURN(output);
|
||||||
data_size_ = output->ElementsNum();
|
data_size_ = static_cast<int>(output->ElementsNum());
|
||||||
thread_sz_count_ = MSMIN(thread_num_, data_size_);
|
thread_sz_count_ = MSMIN(thread_num_, data_size_);
|
||||||
if (thread_sz_count_ != 0) {
|
if (thread_sz_count_ != 0) {
|
||||||
thread_sz_stride_ = UP_DIV(data_size_, thread_sz_count_);
|
thread_sz_stride_ = UP_DIV(data_size_, thread_sz_count_);
|
||||||
|
|
|
@ -21,7 +21,7 @@ using mindspore::lite::RET_ERROR;
|
||||||
using mindspore::lite::RET_OK;
|
using mindspore::lite::RET_OK;
|
||||||
|
|
||||||
namespace mindspore::kernel {
|
namespace mindspore::kernel {
|
||||||
int GroupConvolutionFp32CPUKernel::Separate(int task_id) {
|
int GroupConvolutionFp32CPUKernel::Separate(const int &task_id) const {
|
||||||
auto plane_step = UP_DIV(in_plane_, in_thread_num_);
|
auto plane_step = UP_DIV(in_plane_, in_thread_num_);
|
||||||
MS_CHECK_INT_MUL_NOT_OVERFLOW(plane_step, task_id, RET_ERROR);
|
MS_CHECK_INT_MUL_NOT_OVERFLOW(plane_step, task_id, RET_ERROR);
|
||||||
auto begin_plane = plane_step * task_id;
|
auto begin_plane = plane_step * task_id;
|
||||||
|
@ -31,7 +31,7 @@ int GroupConvolutionFp32CPUKernel::Separate(int task_id) {
|
||||||
auto src_ptr = sub_in_src_ + begin_plane * ori_in_channel_;
|
auto src_ptr = sub_in_src_ + begin_plane * ori_in_channel_;
|
||||||
auto dst_ptr = sub_in_dst_ + begin_plane * sub_in_channel_;
|
auto dst_ptr = sub_in_dst_ + begin_plane * sub_in_channel_;
|
||||||
for (int i = begin_plane; i < end_plane; ++i) {
|
for (int i = begin_plane; i < end_plane; ++i) {
|
||||||
memcpy(dst_ptr, src_ptr, sub_in_channel_ * sizeof(float));
|
(void)memcpy(dst_ptr, src_ptr, sub_in_channel_ * sizeof(float));
|
||||||
src_ptr += ori_in_channel_;
|
src_ptr += ori_in_channel_;
|
||||||
dst_ptr += sub_in_channel_;
|
dst_ptr += sub_in_channel_;
|
||||||
}
|
}
|
||||||
|
@ -63,7 +63,7 @@ int GroupConvolutionFp32CPUKernel::SeparateInput(int group_id) {
|
||||||
return RET_OK;
|
return RET_OK;
|
||||||
}
|
}
|
||||||
|
|
||||||
int GroupConvolutionFp32CPUKernel::Concat(int task_id) {
|
int GroupConvolutionFp32CPUKernel::Concat(const int &task_id) const {
|
||||||
auto plane_step = UP_DIV(out_plane_, out_thread_num_);
|
auto plane_step = UP_DIV(out_plane_, out_thread_num_);
|
||||||
MS_CHECK_INT_MUL_NOT_OVERFLOW(plane_step, task_id, RET_ERROR);
|
MS_CHECK_INT_MUL_NOT_OVERFLOW(plane_step, task_id, RET_ERROR);
|
||||||
auto begin_plane = plane_step * task_id;
|
auto begin_plane = plane_step * task_id;
|
||||||
|
@ -73,7 +73,7 @@ int GroupConvolutionFp32CPUKernel::Concat(int task_id) {
|
||||||
auto src_ptr = sub_out_src_ + begin_plane * sub_out_channel_;
|
auto src_ptr = sub_out_src_ + begin_plane * sub_out_channel_;
|
||||||
auto dst_ptr = sub_out_dst_ + begin_plane * ori_out_channel_;
|
auto dst_ptr = sub_out_dst_ + begin_plane * ori_out_channel_;
|
||||||
for (int i = begin_plane; i < end_plane; ++i) {
|
for (int i = begin_plane; i < end_plane; ++i) {
|
||||||
memcpy(dst_ptr, src_ptr, sub_out_channel_ * sizeof(float));
|
(void)memcpy(dst_ptr, src_ptr, sub_out_channel_ * sizeof(float));
|
||||||
src_ptr += sub_out_channel_;
|
src_ptr += sub_out_channel_;
|
||||||
dst_ptr += ori_out_channel_;
|
dst_ptr += ori_out_channel_;
|
||||||
}
|
}
|
||||||
|
|
|
@ -37,8 +37,8 @@ class GroupConvolutionFp32CPUKernel : public GroupConvolutionBaseCPUKernel {
|
||||||
int SeparateInput(int group_id) override;
|
int SeparateInput(int group_id) override;
|
||||||
int PostConcat(int group_id) override;
|
int PostConcat(int group_id) override;
|
||||||
|
|
||||||
int Separate(int task_id);
|
int Separate(const int &task_id) const;
|
||||||
int Concat(int task_id);
|
int Concat(const int &task_id) const;
|
||||||
|
|
||||||
private:
|
private:
|
||||||
float *sub_in_src_ = nullptr;
|
float *sub_in_src_ = nullptr;
|
||||||
|
|
|
@ -36,9 +36,9 @@ GroupnormCPUKernel::GroupnormCPUKernel(OpParameter *parameter, const std::vector
|
||||||
}
|
}
|
||||||
|
|
||||||
for (size_t i = 0; i < in_tensors_.size(); i++) {
|
for (size_t i = 0; i < in_tensors_.size(); i++) {
|
||||||
Tensor2TensorC(in_tensors_.at(i), &(in_[i]));
|
(void)Tensor2TensorC(in_tensors_.at(i), &(in_[i]));
|
||||||
}
|
}
|
||||||
Tensor2TensorC(out_tensors_.at(0), &(out_[0]));
|
(void)Tensor2TensorC(out_tensors_.at(0), &(out_[0]));
|
||||||
}
|
}
|
||||||
|
|
||||||
GroupnormCPUKernel::~GroupnormCPUKernel() {
|
GroupnormCPUKernel::~GroupnormCPUKernel() {
|
||||||
|
|
|
@ -44,7 +44,6 @@ class InstanceNormCPUKernel : public LiteKernel {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
private:
|
|
||||||
InstanceNormParameter *param_ = nullptr;
|
InstanceNormParameter *param_ = nullptr;
|
||||||
float *src_data_ = nullptr;
|
float *src_data_ = nullptr;
|
||||||
float *tmp_src_data_ = nullptr;
|
float *tmp_src_data_ = nullptr;
|
||||||
|
|
|
@ -59,7 +59,7 @@ int InvertPermutationCPUKernel::Run() {
|
||||||
auto output_ptr = reinterpret_cast<int32_t *>(out_tensor->data());
|
auto output_ptr = reinterpret_cast<int32_t *>(out_tensor->data());
|
||||||
CHECK_NULL_RETURN(input_ptr);
|
CHECK_NULL_RETURN(input_ptr);
|
||||||
CHECK_NULL_RETURN(output_ptr);
|
CHECK_NULL_RETURN(output_ptr);
|
||||||
auto ret = InvertPermutation(input_ptr, output_ptr, in_tensors_[0]->ElementsNum());
|
auto ret = InvertPermutation(input_ptr, output_ptr, static_cast<int>(in_tensors_[0]->ElementsNum()));
|
||||||
if (ret != NNACL_OK) {
|
if (ret != NNACL_OK) {
|
||||||
MS_LOG(ERROR) << "null pointer dereferencing.";
|
MS_LOG(ERROR) << "null pointer dereferencing.";
|
||||||
return RET_ERROR;
|
return RET_ERROR;
|
||||||
|
|
|
@ -40,10 +40,11 @@ int LayerNormCPUKernel::ReSize() {
|
||||||
auto input = in_tensors_.front();
|
auto input = in_tensors_.front();
|
||||||
CHECK_NULL_RETURN(input);
|
CHECK_NULL_RETURN(input);
|
||||||
auto shape = input->shape();
|
auto shape = input->shape();
|
||||||
param_->begin_norm_axis_ =
|
param_->begin_norm_axis_ = param_->begin_norm_axis_ >= 0 ? param_->begin_norm_axis_
|
||||||
param_->begin_norm_axis_ >= 0 ? param_->begin_norm_axis_ : param_->begin_norm_axis_ + shape.size();
|
: param_->begin_norm_axis_ + static_cast<int>(shape.size());
|
||||||
param_->begin_params_axis_ =
|
param_->begin_params_axis_ = param_->begin_params_axis_ >= 0
|
||||||
param_->begin_params_axis_ >= 0 ? param_->begin_params_axis_ : param_->begin_params_axis_ + shape.size();
|
? param_->begin_params_axis_
|
||||||
|
: param_->begin_params_axis_ + static_cast<int>(shape.size());
|
||||||
MS_CHECK_LT(param_->begin_norm_axis_, static_cast<int>(shape.size()), RET_ERROR);
|
MS_CHECK_LT(param_->begin_norm_axis_, static_cast<int>(shape.size()), RET_ERROR);
|
||||||
MS_CHECK_LT(param_->begin_params_axis_, static_cast<int>(shape.size()), RET_ERROR);
|
MS_CHECK_LT(param_->begin_params_axis_, static_cast<int>(shape.size()), RET_ERROR);
|
||||||
param_->norm_outer_size_ = 1;
|
param_->norm_outer_size_ = 1;
|
||||||
|
|
|
@ -21,10 +21,13 @@ using mindspore::lite::RET_ERROR;
|
||||||
using mindspore::lite::RET_OK;
|
using mindspore::lite::RET_OK;
|
||||||
|
|
||||||
namespace mindspore::kernel {
|
namespace mindspore::kernel {
|
||||||
int GroupConvolutionInt8CPUKernel::Separate(int task_id) {
|
int GroupConvolutionInt8CPUKernel::Separate(const int &task_id) const {
|
||||||
auto plane_step = UP_DIV(in_plane_, in_thread_num_);
|
auto plane_step = UP_DIV(in_plane_, in_thread_num_);
|
||||||
|
MS_CHECK_INT_MUL_NOT_OVERFLOW(plane_step, task_id, RET_ERROR);
|
||||||
auto begin_plane = plane_step * task_id;
|
auto begin_plane = plane_step * task_id;
|
||||||
auto end_plane = MSMIN(in_plane_, plane_step * (task_id + 1));
|
auto end_plane = MSMIN(in_plane_, plane_step * (task_id + 1));
|
||||||
|
MS_CHECK_INT_MUL_NOT_OVERFLOW(begin_plane, ori_in_channel_, RET_ERROR);
|
||||||
|
MS_CHECK_INT_MUL_NOT_OVERFLOW(begin_plane, sub_in_channel_, RET_ERROR);
|
||||||
auto src_ptr = sub_in_src_ + begin_plane * ori_in_channel_;
|
auto src_ptr = sub_in_src_ + begin_plane * ori_in_channel_;
|
||||||
auto dst_ptr = sub_in_dst_ + begin_plane * sub_in_channel_;
|
auto dst_ptr = sub_in_dst_ + begin_plane * sub_in_channel_;
|
||||||
for (int i = begin_plane; i < end_plane; ++i) {
|
for (int i = begin_plane; i < end_plane; ++i) {
|
||||||
|
@ -59,10 +62,13 @@ int GroupConvolutionInt8CPUKernel::SeparateInput(int group_id) {
|
||||||
return RET_OK;
|
return RET_OK;
|
||||||
}
|
}
|
||||||
|
|
||||||
int GroupConvolutionInt8CPUKernel::Concat(int task_id) {
|
int GroupConvolutionInt8CPUKernel::Concat(const int &task_id) const {
|
||||||
auto plane_step = UP_DIV(out_plane_, out_thread_num_);
|
auto plane_step = UP_DIV(out_plane_, out_thread_num_);
|
||||||
|
MS_CHECK_INT_MUL_NOT_OVERFLOW(plane_step, task_id, RET_ERROR);
|
||||||
auto begin_plane = plane_step * task_id;
|
auto begin_plane = plane_step * task_id;
|
||||||
auto end_plane = MSMIN(out_plane_, plane_step * (task_id + 1));
|
auto end_plane = MSMIN(out_plane_, plane_step * (task_id + 1));
|
||||||
|
MS_CHECK_INT_MUL_NOT_OVERFLOW(begin_plane, sub_out_channel_, RET_ERROR);
|
||||||
|
MS_CHECK_INT_MUL_NOT_OVERFLOW(begin_plane, ori_out_channel_, RET_ERROR);
|
||||||
auto src_ptr = sub_out_src_ + begin_plane * sub_out_channel_;
|
auto src_ptr = sub_out_src_ + begin_plane * sub_out_channel_;
|
||||||
auto dst_ptr = sub_out_dst_ + begin_plane * ori_out_channel_;
|
auto dst_ptr = sub_out_dst_ + begin_plane * ori_out_channel_;
|
||||||
for (int i = begin_plane; i < end_plane; ++i) {
|
for (int i = begin_plane; i < end_plane; ++i) {
|
||||||
|
|
|
@ -37,8 +37,8 @@ class GroupConvolutionInt8CPUKernel : public GroupConvolutionBaseCPUKernel {
|
||||||
int SeparateInput(int group_id) override;
|
int SeparateInput(int group_id) override;
|
||||||
int PostConcat(int group_id) override;
|
int PostConcat(int group_id) override;
|
||||||
|
|
||||||
int Separate(int task_id);
|
int Separate(const int &task_id) const;
|
||||||
int Concat(int task_id);
|
int Concat(const int &task_id) const;
|
||||||
|
|
||||||
private:
|
private:
|
||||||
int8_t *sub_in_src_ = nullptr;
|
int8_t *sub_in_src_ = nullptr;
|
||||||
|
|
|
@ -91,7 +91,7 @@ int KernelExec::DoExecute() {
|
||||||
return ret;
|
return ret;
|
||||||
}
|
}
|
||||||
|
|
||||||
void KernelExec::RepalceKernel(std::shared_ptr<Kernel> kernel) {
|
void KernelExec::RepalceKernel(const std::shared_ptr<Kernel> kernel) {
|
||||||
if (desc_.provider == kBuiltin) {
|
if (desc_.provider == kBuiltin) {
|
||||||
std::static_pointer_cast<LiteKernel>(kernel_)->set_parameter(nullptr); // set nullptr, don't release op_parameter
|
std::static_pointer_cast<LiteKernel>(kernel_)->set_parameter(nullptr); // set nullptr, don't release op_parameter
|
||||||
kernel_.reset();
|
kernel_.reset();
|
||||||
|
|
|
@ -234,7 +234,7 @@ class KernelExec {
|
||||||
MS_ASSERT(index < kernel_->inputs().size());
|
MS_ASSERT(index < kernel_->inputs().size());
|
||||||
auto impl = std::make_shared<mindspore::LiteTensorImpl>(in_tensor);
|
auto impl = std::make_shared<mindspore::LiteTensorImpl>(in_tensor);
|
||||||
auto tensor_in = mindspore::MSTensor(impl);
|
auto tensor_in = mindspore::MSTensor(impl);
|
||||||
kernel_->set_input(tensor_in, index);
|
kernel_->set_input(tensor_in, static_cast<int>(index));
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -261,7 +261,7 @@ class KernelExec {
|
||||||
MS_ASSERT(index < kernel_->outputs().size());
|
MS_ASSERT(index < kernel_->outputs().size());
|
||||||
auto impl = std::make_shared<mindspore::LiteTensorImpl>(out_tensor);
|
auto impl = std::make_shared<mindspore::LiteTensorImpl>(out_tensor);
|
||||||
auto tensor_out = mindspore::MSTensor(impl);
|
auto tensor_out = mindspore::MSTensor(impl);
|
||||||
kernel_->set_output(tensor_out, index);
|
kernel_->set_output(tensor_out, static_cast<int>(index));
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -317,8 +317,8 @@ class KernelExec {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
size_t FindInTensorIndex(lite::Tensor *tensor) {
|
size_t FindInTensorIndex(const lite::Tensor *tensor) {
|
||||||
int index = 0;
|
size_t index = 0;
|
||||||
for (size_t i = 0; i < in_tensors().size(); i++) {
|
for (size_t i = 0; i < in_tensors().size(); i++) {
|
||||||
if (tensor == in_tensors().at(i)) {
|
if (tensor == in_tensors().at(i)) {
|
||||||
index = i;
|
index = i;
|
||||||
|
@ -328,8 +328,8 @@ class KernelExec {
|
||||||
return index;
|
return index;
|
||||||
}
|
}
|
||||||
|
|
||||||
size_t FindOutTensorIndex(lite::Tensor *tensor) {
|
size_t FindOutTensorIndex(const lite::Tensor *tensor) {
|
||||||
int index = 0;
|
size_t index = 0;
|
||||||
for (size_t i = 0; i < out_tensors().size(); i++) {
|
for (size_t i = 0; i < out_tensors().size(); i++) {
|
||||||
if (tensor == out_tensors().at(i)) {
|
if (tensor == out_tensors().at(i)) {
|
||||||
index = i;
|
index = i;
|
||||||
|
@ -339,9 +339,9 @@ class KernelExec {
|
||||||
return index;
|
return index;
|
||||||
}
|
}
|
||||||
|
|
||||||
void RemoveInKernel(KernelExec *kernel) { lite::VectorErase(&(this->in_kernels_), kernel); }
|
void RemoveInKernel(KernelExec *kernel) { (void)lite::VectorErase(&(this->in_kernels_), kernel); }
|
||||||
|
|
||||||
void RemoveOutKernel(KernelExec *kernel) { lite::VectorErase(&(this->out_kernels_), kernel); }
|
void RemoveOutKernel(KernelExec *kernel) { (void)lite::VectorErase(&(this->out_kernels_), kernel); }
|
||||||
|
|
||||||
void set_in_kernels(const std::vector<KernelExec *> &kernel) { this->in_kernels_ = kernel; }
|
void set_in_kernels(const std::vector<KernelExec *> &kernel) { this->in_kernels_ = kernel; }
|
||||||
|
|
||||||
|
|
|
@ -266,7 +266,7 @@ void KernelExecUtil::FindAllInoutKernelsInSubgraphKernel(const std::vector<Kerne
|
||||||
KernelExecUtil::FindAllInoutKernels(all_kernels);
|
KernelExecUtil::FindAllInoutKernels(all_kernels);
|
||||||
}
|
}
|
||||||
|
|
||||||
KernelExec *KernelExecUtil::FindInKernelForInTensor(KernelExec *kernel, lite::Tensor *tensor) {
|
KernelExec *KernelExecUtil::FindInKernelForInTensor(const KernelExec *kernel, lite::Tensor *tensor) {
|
||||||
for (auto in_kernel : kernel->in_kernels()) {
|
for (auto in_kernel : kernel->in_kernels()) {
|
||||||
if (lite::IsContain(in_kernel->out_tensors(), tensor)) {
|
if (lite::IsContain(in_kernel->out_tensors(), tensor)) {
|
||||||
return in_kernel;
|
return in_kernel;
|
||||||
|
@ -275,7 +275,7 @@ KernelExec *KernelExecUtil::FindInKernelForInTensor(KernelExec *kernel, lite::Te
|
||||||
return nullptr;
|
return nullptr;
|
||||||
}
|
}
|
||||||
|
|
||||||
std::vector<KernelExec *> KernelExecUtil::FindOutKernelsForOutTensor(KernelExec *kernel, lite::Tensor *tensor) {
|
std::vector<KernelExec *> KernelExecUtil::FindOutKernelsForOutTensor(const KernelExec *kernel, lite::Tensor *tensor) {
|
||||||
std::vector<KernelExec *> out_kernels;
|
std::vector<KernelExec *> out_kernels;
|
||||||
for (auto out_kernel : kernel->out_kernels()) {
|
for (auto out_kernel : kernel->out_kernels()) {
|
||||||
if (lite::IsContain(out_kernel->in_tensors(), tensor)) {
|
if (lite::IsContain(out_kernel->in_tensors(), tensor)) {
|
||||||
|
@ -285,7 +285,7 @@ std::vector<KernelExec *> KernelExecUtil::FindOutKernelsForOutTensor(KernelExec
|
||||||
return out_kernels;
|
return out_kernels;
|
||||||
}
|
}
|
||||||
|
|
||||||
int KernelExecUtil::SetKernelTensorDataType(kernel::KernelExec *kernel) {
|
int KernelExecUtil::SetKernelTensorDataType(const kernel::KernelExec *kernel) {
|
||||||
CHECK_NULL_RETURN(kernel);
|
CHECK_NULL_RETURN(kernel);
|
||||||
if (kernel->desc().arch != kernel::KERNEL_ARCH::kCPU) {
|
if (kernel->desc().arch != kernel::KERNEL_ARCH::kCPU) {
|
||||||
return RET_OK;
|
return RET_OK;
|
||||||
|
|
|
@ -45,9 +45,9 @@ class KernelExecUtil {
|
||||||
// find in_kernels_ and out_kernels of kernel, sub_graph and nodes_ in sub_graph
|
// find in_kernels_ and out_kernels of kernel, sub_graph and nodes_ in sub_graph
|
||||||
static void FindAllInoutKernels(const std::vector<KernelExec *> &kernels);
|
static void FindAllInoutKernels(const std::vector<KernelExec *> &kernels);
|
||||||
static void FindAllInoutKernelsInSubgraphKernel(const std::vector<KernelExec *> &kernels);
|
static void FindAllInoutKernelsInSubgraphKernel(const std::vector<KernelExec *> &kernels);
|
||||||
static KernelExec *FindInKernelForInTensor(KernelExec *kernel, lite::Tensor *tensor);
|
static KernelExec *FindInKernelForInTensor(const KernelExec *kernel, lite::Tensor *tensor);
|
||||||
static std::vector<KernelExec *> FindOutKernelsForOutTensor(KernelExec *kernel, lite::Tensor *tensor);
|
static std::vector<KernelExec *> FindOutKernelsForOutTensor(const KernelExec *kernel, lite::Tensor *tensor);
|
||||||
static int SetKernelTensorDataType(kernel::KernelExec *kernel);
|
static int SetKernelTensorDataType(const kernel::KernelExec *kernel);
|
||||||
static SubGraphKernel *CreateSubGraphKernel(const std::vector<KernelExec *> &kernels,
|
static SubGraphKernel *CreateSubGraphKernel(const std::vector<KernelExec *> &kernels,
|
||||||
const std::vector<lite::Tensor *> *in_tensors,
|
const std::vector<lite::Tensor *> *in_tensors,
|
||||||
const std::vector<lite::Tensor *> *out_tensors, SubGraphType type,
|
const std::vector<lite::Tensor *> *out_tensors, SubGraphType type,
|
||||||
|
|
|
@ -164,11 +164,11 @@ class LiteKernel : public Abstractkernel {
|
||||||
return mindspore::lite::RET_OK;
|
return mindspore::lite::RET_OK;
|
||||||
}
|
}
|
||||||
|
|
||||||
virtual int SetupVirtualBatch(int virtual_batch_multiplier, int param) { return mindspore::lite::RET_OK; }
|
virtual int SetupVirtualBatch(int, int) { return mindspore::lite::RET_OK; }
|
||||||
|
|
||||||
bool IsEval() const override { return !this->train_mode_; }
|
bool IsEval() const override { return !this->train_mode_; }
|
||||||
|
|
||||||
void SetTrainable(bool trainable = true) override { this->trainable_ = trainable; }
|
void SetTrainable(bool trainable) override { this->trainable_ = trainable; }
|
||||||
|
|
||||||
bool IsTrainable() const override { return this->trainable_; }
|
bool IsTrainable() const override { return this->trainable_; }
|
||||||
|
|
||||||
|
@ -186,7 +186,6 @@ class LiteKernel : public Abstractkernel {
|
||||||
workspace_ = ws;
|
workspace_ = ws;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
const lite::InnerContext *context() const { return this->ms_context_; }
|
|
||||||
bool ws_allocated_ = false;
|
bool ws_allocated_ = false;
|
||||||
|
|
||||||
protected:
|
protected:
|
||||||
|
|
|
@ -25,16 +25,12 @@ int TransFullyFusion(kernel::SubGraphKernel *subgraph, kernel::KernelExec *trans
|
||||||
CHECK_NULL_RETURN(trans_kernel0);
|
CHECK_NULL_RETURN(trans_kernel0);
|
||||||
CHECK_NULL_RETURN(trans_kernel1);
|
CHECK_NULL_RETURN(trans_kernel1);
|
||||||
auto in_tensor = trans_kernel0->in_tensors().at(0);
|
auto in_tensor = trans_kernel0->in_tensors().at(0);
|
||||||
|
|
||||||
auto out_tensor = trans_kernel1->out_tensors().at(0);
|
auto out_tensor = trans_kernel1->out_tensors().at(0);
|
||||||
auto in_kernel = kernel::KernelExecUtil::FindInKernelForInTensor(trans_kernel0, in_tensor);
|
auto in_kernel = kernel::KernelExecUtil::FindInKernelForInTensor(trans_kernel0, in_tensor);
|
||||||
auto out_kernels = kernel::KernelExecUtil::FindOutKernelsForOutTensor(trans_kernel1, out_tensor);
|
auto out_kernels = kernel::KernelExecUtil::FindOutKernelsForOutTensor(trans_kernel1, out_tensor);
|
||||||
auto ret = subgraph->UpdateInOutKernels(in_kernel, out_kernels, trans_kernel0, trans_kernel1);
|
subgraph->UpdateInOutKernels(in_kernel, out_kernels, trans_kernel0, trans_kernel1);
|
||||||
if (ret != RET_OK) {
|
auto ret = subgraph->UpdateInOutTensors(in_kernel, out_kernels, in_tensor, out_tensor, true);
|
||||||
MS_LOG(ERROR) << "Update kernel link failed when fusing kernel " << trans_kernel0->name() << " and "
|
|
||||||
<< trans_kernel1->name();
|
|
||||||
return RET_ERROR;
|
|
||||||
}
|
|
||||||
ret = subgraph->UpdateInOutTensors(in_kernel, out_kernels, in_tensor, out_tensor, true);
|
|
||||||
if (ret != RET_OK) {
|
if (ret != RET_OK) {
|
||||||
MS_LOG(ERROR) << "Update tensor failed when fusing kernel " << trans_kernel0->name() << " and "
|
MS_LOG(ERROR) << "Update tensor failed when fusing kernel " << trans_kernel0->name() << " and "
|
||||||
<< trans_kernel1->name();
|
<< trans_kernel1->name();
|
||||||
|
@ -59,12 +55,7 @@ int TransHeadTailFusion(kernel::SubGraphKernel *subgraph, kernel::KernelExec *tr
|
||||||
auto out_tensor = trans_kernel1->out_tensors().at(0);
|
auto out_tensor = trans_kernel1->out_tensors().at(0);
|
||||||
auto in_kernel = kernel::KernelExecUtil::FindInKernelForInTensor(trans_kernel0, in_tensor);
|
auto in_kernel = kernel::KernelExecUtil::FindInKernelForInTensor(trans_kernel0, in_tensor);
|
||||||
auto out_kernels = kernel::KernelExecUtil::FindOutKernelsForOutTensor(trans_kernel1, out_tensor);
|
auto out_kernels = kernel::KernelExecUtil::FindOutKernelsForOutTensor(trans_kernel1, out_tensor);
|
||||||
auto ret = subgraph->UpdateInOutKernels(in_kernel, out_kernels, trans_kernel0, trans_kernel1);
|
subgraph->UpdateInOutKernels(in_kernel, out_kernels, trans_kernel0, trans_kernel1);
|
||||||
if (ret != RET_OK) {
|
|
||||||
MS_LOG(ERROR) << "Update kernel link failed when fusing kernel " << trans_kernel0->name() << " and "
|
|
||||||
<< trans_kernel1->name();
|
|
||||||
return RET_ERROR;
|
|
||||||
}
|
|
||||||
// new trans kernel: src_format -> dst_format
|
// new trans kernel: src_format -> dst_format
|
||||||
auto trans_name = trans_kernel0->name() + "_and_" + trans_kernel1->name() + "_fusion";
|
auto trans_name = trans_kernel0->name() + "_and_" + trans_kernel1->name() + "_fusion";
|
||||||
auto kernel = CreateFormatTranspose(in_tensor, out_tensor, trans_info, trans_name, ctx, desc);
|
auto kernel = CreateFormatTranspose(in_tensor, out_tensor, trans_info, trans_name, ctx, desc);
|
||||||
|
@ -98,7 +89,7 @@ int DecreaseTransposeAlgo::TransTransFusion(kernel::SubGraphKernel *subgraph) {
|
||||||
while (kernel_iter != kernels->end()) {
|
while (kernel_iter != kernels->end()) {
|
||||||
auto &kernel = *kernel_iter;
|
auto &kernel = *kernel_iter;
|
||||||
CHECK_NULL_RETURN(kernel);
|
CHECK_NULL_RETURN(kernel);
|
||||||
kernel_iter++;
|
(void)kernel_iter++;
|
||||||
|
|
||||||
if (kernel->in_kernels().size() == 0 || !IsContain(subgraph->nodes(), kernel->in_kernels().at(0))) {
|
if (kernel->in_kernels().size() == 0 || !IsContain(subgraph->nodes(), kernel->in_kernels().at(0))) {
|
||||||
continue;
|
continue;
|
||||||
|
@ -227,11 +218,7 @@ int DoPreFusion(kernel::SubGraphKernel *subgraph, kernel::KernelExec *kernel, st
|
||||||
}
|
}
|
||||||
} else {
|
} else {
|
||||||
auto pre_in_kernel = kernel::KernelExecUtil::FindInKernelForInTensor(in_kernel, in_kernel->in_tensors().at(0));
|
auto pre_in_kernel = kernel::KernelExecUtil::FindInKernelForInTensor(in_kernel, in_kernel->in_tensors().at(0));
|
||||||
ret = subgraph->UpdateInOutKernels(pre_in_kernel, {kernel}, in_kernel, in_kernel);
|
subgraph->UpdateInOutKernels(pre_in_kernel, {kernel}, in_kernel, in_kernel);
|
||||||
if (ret != RET_OK) {
|
|
||||||
MS_LOG(ERROR) << "Update kernel link failed when removing kernel " << in_kernel->name();
|
|
||||||
return RET_ERROR;
|
|
||||||
}
|
|
||||||
ret = subgraph->UpdateInOutTensors(pre_in_kernel, {kernel}, in_kernel->in_tensors().at(0), in_tensor, true);
|
ret = subgraph->UpdateInOutTensors(pre_in_kernel, {kernel}, in_kernel->in_tensors().at(0), in_tensor, true);
|
||||||
if (ret != RET_OK) {
|
if (ret != RET_OK) {
|
||||||
MS_LOG(ERROR) << "Update tensor failed when removing kernel " << in_kernel->name();
|
MS_LOG(ERROR) << "Update tensor failed when removing kernel " << in_kernel->name();
|
||||||
|
@ -248,7 +235,7 @@ int DoPreFusion(kernel::SubGraphKernel *subgraph, kernel::KernelExec *kernel, st
|
||||||
return RET_OK;
|
return RET_OK;
|
||||||
}
|
}
|
||||||
|
|
||||||
int DoPostFusion(kernel::SubGraphKernel *subgraph, kernel::KernelExec *kernel, std::vector<Tensor *> *all_tensors,
|
int DoPostFusion(kernel::SubGraphKernel *subgraph, const kernel::KernelExec *kernel, std::vector<Tensor *> *all_tensors,
|
||||||
const TransInfoPair &post_trans) {
|
const TransInfoPair &post_trans) {
|
||||||
for (size_t i = 0; i < kernel->out_tensors().size(); i++) {
|
for (size_t i = 0; i < kernel->out_tensors().size(); i++) {
|
||||||
auto tensor = kernel->out_tensors().at(i);
|
auto tensor = kernel->out_tensors().at(i);
|
||||||
|
@ -259,13 +246,13 @@ int DoPostFusion(kernel::SubGraphKernel *subgraph, kernel::KernelExec *kernel, s
|
||||||
TransInfoPair out_kernel_trans;
|
TransInfoPair out_kernel_trans;
|
||||||
auto ret = GetTransposeInfo(out_kernel, &out_kernel_trans);
|
auto ret = GetTransposeInfo(out_kernel, &out_kernel_trans);
|
||||||
if (ret == RET_OK && IsSameTranspose(post_trans, out_kernel_trans)) {
|
if (ret == RET_OK && IsSameTranspose(post_trans, out_kernel_trans)) {
|
||||||
to_deletes.emplace_back(out_kernel);
|
(void)to_deletes.emplace_back(out_kernel);
|
||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
auto in_tensor_of_out_kernel_idx = out_kernel->FindInTensorIndex(tensor);
|
auto in_tensor_of_out_kernel_idx = out_kernel->FindInTensorIndex(tensor);
|
||||||
ret = InsertPreTranspose(subgraph, out_kernel, all_tensors,
|
ret =
|
||||||
TransInfoPair(post_trans.dst_format_, post_trans.src_format_),
|
InsertPreTranspose(subgraph, out_kernel, all_tensors,
|
||||||
static_cast<int>(in_tensor_of_out_kernel_idx));
|
TransInfoPair(post_trans.dst_format_, post_trans.src_format_), in_tensor_of_out_kernel_idx);
|
||||||
if (ret != RET_OK) {
|
if (ret != RET_OK) {
|
||||||
MS_LOG(ERROR) << "Insert pre transpose kernel for op: " << out_kernel->name() << " input tensor "
|
MS_LOG(ERROR) << "Insert pre transpose kernel for op: " << out_kernel->name() << " input tensor "
|
||||||
<< in_tensor_of_out_kernel_idx << " failed.";
|
<< in_tensor_of_out_kernel_idx << " failed.";
|
||||||
|
@ -293,7 +280,7 @@ int DecreaseTransposeAlgo::DecreaseTransposeForSingleKernel(kernel::SubGraphKern
|
||||||
TransInfoPair pre_trans;
|
TransInfoPair pre_trans;
|
||||||
TransInfoPair post_trans;
|
TransInfoPair post_trans;
|
||||||
if (!transpose_strategy_.CheckFusion(kernel, &pre_trans, &post_trans)) {
|
if (!transpose_strategy_.CheckFusion(kernel, &pre_trans, &post_trans)) {
|
||||||
kernel_iter++;
|
(void)kernel_iter++;
|
||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
auto ret = transpose_strategy_.ChangeKernelAxis(kernel, post_trans);
|
auto ret = transpose_strategy_.ChangeKernelAxis(kernel, post_trans);
|
||||||
|
@ -314,7 +301,7 @@ int DecreaseTransposeAlgo::DecreaseTransposeForSingleKernel(kernel::SubGraphKern
|
||||||
return RET_ERROR;
|
return RET_ERROR;
|
||||||
}
|
}
|
||||||
kernel_iter = find(kernels->begin(), kernels->end(), kernel);
|
kernel_iter = find(kernels->begin(), kernels->end(), kernel);
|
||||||
kernel_iter++;
|
(void)kernel_iter++;
|
||||||
}
|
}
|
||||||
return RET_OK;
|
return RET_OK;
|
||||||
}
|
}
|
||||||
|
|
|
@ -20,7 +20,7 @@
|
||||||
#include "src/litert/kernel_exec_util.h"
|
#include "src/litert/kernel_exec_util.h"
|
||||||
|
|
||||||
namespace mindspore::lite::pass {
|
namespace mindspore::lite::pass {
|
||||||
int DeleteIsolatedKernel::Run(kernel::SubGraphKernel *subgraph, std::vector<Tensor *> *tensors) {
|
int DeleteIsolatedKernel::Run(kernel::SubGraphKernel *subgraph, std::vector<Tensor *> *) {
|
||||||
subgraph->SetInNodes(kernel::KernelExecUtil::SubgraphInputNodes(subgraph->nodes()));
|
subgraph->SetInNodes(kernel::KernelExecUtil::SubgraphInputNodes(subgraph->nodes()));
|
||||||
|
|
||||||
std::set<kernel::KernelExec *> visited; // record the kernel that will be executed
|
std::set<kernel::KernelExec *> visited; // record the kernel that will be executed
|
||||||
|
@ -28,7 +28,7 @@ int DeleteIsolatedKernel::Run(kernel::SubGraphKernel *subgraph, std::vector<Tens
|
||||||
|
|
||||||
for (auto in_kernel : subgraph->in_nodes()) {
|
for (auto in_kernel : subgraph->in_nodes()) {
|
||||||
kernel_queue.push(in_kernel);
|
kernel_queue.push(in_kernel);
|
||||||
visited.insert(in_kernel);
|
(void)visited.insert(in_kernel);
|
||||||
}
|
}
|
||||||
while (!kernel_queue.empty()) {
|
while (!kernel_queue.empty()) {
|
||||||
auto kernel = kernel_queue.front();
|
auto kernel = kernel_queue.front();
|
||||||
|
@ -39,7 +39,7 @@ int DeleteIsolatedKernel::Run(kernel::SubGraphKernel *subgraph, std::vector<Tens
|
||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
kernel_queue.push(out_kernel);
|
kernel_queue.push(out_kernel);
|
||||||
visited.insert(out_kernel);
|
(void)visited.insert(out_kernel);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -25,7 +25,7 @@ class DeleteIsolatedKernel : public RuntimePass {
|
||||||
public:
|
public:
|
||||||
DeleteIsolatedKernel() {}
|
DeleteIsolatedKernel() {}
|
||||||
~DeleteIsolatedKernel() override = default;
|
~DeleteIsolatedKernel() override = default;
|
||||||
int Run(kernel::SubGraphKernel *subgraph, std::vector<Tensor *> *tensors = nullptr) override;
|
int Run(kernel::SubGraphKernel *subgraph, std::vector<Tensor *> *) override;
|
||||||
};
|
};
|
||||||
} // namespace mindspore::lite::pass
|
} // namespace mindspore::lite::pass
|
||||||
#endif // MINDSPORE_LITE_SRC_RUNTIME_PASS_DELETE_ISOLATED_KERNEL_H_
|
#endif // MINDSPORE_LITE_SRC_RUNTIME_PASS_DELETE_ISOLATED_KERNEL_H_
|
||||||
|
|
|
@ -19,7 +19,7 @@
|
||||||
#include "src/litert/kernel_exec_util.h"
|
#include "src/litert/kernel_exec_util.h"
|
||||||
|
|
||||||
namespace mindspore::lite::pass {
|
namespace mindspore::lite::pass {
|
||||||
int Infershape::Run(kernel::SubGraphKernel *subgraph, std::vector<Tensor *> *tensors) {
|
int Infershape::Run(kernel::SubGraphKernel *subgraph, std::vector<Tensor *> *) {
|
||||||
auto kernels = &(subgraph->nodes());
|
auto kernels = &(subgraph->nodes());
|
||||||
for (const auto &kernel : *kernels) {
|
for (const auto &kernel : *kernels) {
|
||||||
CHECK_NULL_RETURN(kernel);
|
CHECK_NULL_RETURN(kernel);
|
||||||
|
|
|
@ -25,7 +25,7 @@ class Infershape : public RuntimePass {
|
||||||
public:
|
public:
|
||||||
Infershape() {}
|
Infershape() {}
|
||||||
~Infershape() override = default;
|
~Infershape() override = default;
|
||||||
int Run(kernel::SubGraphKernel *subgraph, std::vector<Tensor *> *tensors = nullptr) override;
|
int Run(kernel::SubGraphKernel *subgraph, std::vector<Tensor *> *) override;
|
||||||
};
|
};
|
||||||
} // namespace mindspore::lite::pass
|
} // namespace mindspore::lite::pass
|
||||||
#endif // MINDSPORE_LITE_SRC_RUNTIME_PASS_INFERSHAPE_PASS_H_
|
#endif // MINDSPORE_LITE_SRC_RUNTIME_PASS_INFERSHAPE_PASS_H_
|
||||||
|
|
|
@ -61,8 +61,8 @@ kernel::KernelExec *CreateFormatTranspose(Tensor *input, Tensor *output, const T
|
||||||
MS_LOG(ERROR) << "Malloc FormatTransposeParameter failed.";
|
MS_LOG(ERROR) << "Malloc FormatTransposeParameter failed.";
|
||||||
return nullptr;
|
return nullptr;
|
||||||
}
|
}
|
||||||
memset(param, 0, sizeof(FormatTransposeParameter));
|
(void)memset(param, 0, sizeof(FormatTransposeParameter));
|
||||||
param->op_parameter_.type_ = schema::PrimitiveType_FormatTranspose;
|
param->op_parameter_.type_ = static_cast<int>(schema::PrimitiveType_FormatTranspose);
|
||||||
param->src_format_ = trans_info.src_format_;
|
param->src_format_ = trans_info.src_format_;
|
||||||
param->dst_format_ = trans_info.dst_format_;
|
param->dst_format_ = trans_info.dst_format_;
|
||||||
kernel::KernelKey format_transpose_key = desc;
|
kernel::KernelKey format_transpose_key = desc;
|
||||||
|
@ -80,7 +80,7 @@ kernel::KernelExec *CreateFormatTranspose(Tensor *input, Tensor *output, const T
|
||||||
return kernel;
|
return kernel;
|
||||||
}
|
}
|
||||||
|
|
||||||
void SetShape(Tensor *src_tensor, Tensor *dst_tensor) {
|
void SetShape(const Tensor *src_tensor, Tensor *dst_tensor) {
|
||||||
auto shape = src_tensor->shape();
|
auto shape = src_tensor->shape();
|
||||||
auto invalid_shape = {-1};
|
auto invalid_shape = {-1};
|
||||||
if (shape.size() != DIMENSION_4D) {
|
if (shape.size() != DIMENSION_4D) {
|
||||||
|
@ -105,7 +105,7 @@ void SetShape(Tensor *src_tensor, Tensor *dst_tensor) {
|
||||||
}
|
}
|
||||||
|
|
||||||
int InsertPreTranspose(kernel::SubGraphKernel *subgraph, kernel::KernelExec *kernel, std::vector<Tensor *> *all_tensors,
|
int InsertPreTranspose(kernel::SubGraphKernel *subgraph, kernel::KernelExec *kernel, std::vector<Tensor *> *all_tensors,
|
||||||
const TransInfoPair &trans_info, const int &index) {
|
const TransInfoPair &trans_info, const size_t &index) {
|
||||||
auto trans_name = kernel->name() + "_pre_" + std::to_string(index);
|
auto trans_name = kernel->name() + "_pre_" + std::to_string(index);
|
||||||
auto in_tensor = kernel->in_tensors().at(index);
|
auto in_tensor = kernel->in_tensors().at(index);
|
||||||
auto out_tensor = new (std::nothrow) Tensor(in_tensor->data_type(), {}, (Format)trans_info.dst_format_);
|
auto out_tensor = new (std::nothrow) Tensor(in_tensor->data_type(), {}, (Format)trans_info.dst_format_);
|
||||||
|
@ -126,7 +126,7 @@ int InsertPreTranspose(kernel::SubGraphKernel *subgraph, kernel::KernelExec *ker
|
||||||
}
|
}
|
||||||
|
|
||||||
int InsertPostTranspose(kernel::SubGraphKernel *subgraph, kernel::KernelExec *kernel,
|
int InsertPostTranspose(kernel::SubGraphKernel *subgraph, kernel::KernelExec *kernel,
|
||||||
std::vector<Tensor *> *all_tensors, const TransInfoPair &trans_info, const int &index) {
|
std::vector<Tensor *> *all_tensors, const TransInfoPair &trans_info, const size_t &index) {
|
||||||
auto trans_name = kernel->name() + "_post_" + std::to_string(index);
|
auto trans_name = kernel->name() + "_post_" + std::to_string(index);
|
||||||
|
|
||||||
auto out_tensor = kernel->out_tensors().at(index);
|
auto out_tensor = kernel->out_tensors().at(index);
|
||||||
|
|
|
@ -41,13 +41,13 @@ kernel::KernelExec *CreateFormatTranspose(Tensor *input, Tensor *output, const T
|
||||||
const std::string &name, const lite::InnerContext *ctx,
|
const std::string &name, const lite::InnerContext *ctx,
|
||||||
const kernel::KernelKey &desc);
|
const kernel::KernelKey &desc);
|
||||||
|
|
||||||
void SetShape(Tensor *src_tensor, Tensor *dst_tensor);
|
void SetShape(const Tensor *src_tensor, Tensor *dst_tensor);
|
||||||
|
|
||||||
int InsertPreTranspose(kernel::SubGraphKernel *subgraph, kernel::KernelExec *kernel, std::vector<Tensor *> *all_tensors,
|
int InsertPreTranspose(kernel::SubGraphKernel *subgraph, kernel::KernelExec *kernel, std::vector<Tensor *> *all_tensors,
|
||||||
const TransInfoPair &trans_info, const int &index);
|
const TransInfoPair &trans_info, const size_t &index);
|
||||||
|
|
||||||
int InsertPostTranspose(kernel::SubGraphKernel *subgraph, kernel::KernelExec *kernel,
|
int InsertPostTranspose(kernel::SubGraphKernel *subgraph, kernel::KernelExec *kernel,
|
||||||
std::vector<Tensor *> *all_tensors, const TransInfoPair &trans_info, const int &index);
|
std::vector<Tensor *> *all_tensors, const TransInfoPair &trans_info, const size_t &index);
|
||||||
|
|
||||||
int GetTransposeInfo(const kernel::KernelExec *kernel, TransInfoPair *trans_info);
|
int GetTransposeInfo(const kernel::KernelExec *kernel, TransInfoPair *trans_info);
|
||||||
} // namespace mindspore::lite::pass
|
} // namespace mindspore::lite::pass
|
||||||
|
|
|
@ -21,7 +21,7 @@ RuntimeOptimizer::~RuntimeOptimizer() { passes_.clear(); }
|
||||||
|
|
||||||
int RuntimeOptimizer::AddPass(RuntimePassPtr pass) {
|
int RuntimeOptimizer::AddPass(RuntimePassPtr pass) {
|
||||||
CHECK_NULL_RETURN(pass);
|
CHECK_NULL_RETURN(pass);
|
||||||
this->passes_.emplace_back(pass);
|
(void)this->passes_.emplace_back(pass);
|
||||||
return RET_OK;
|
return RET_OK;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -36,7 +36,7 @@ namespace mindspore::lite::pass {
|
||||||
|
|
||||||
class ToNCHWFormat : public RuntimePass {
|
class ToNCHWFormat : public RuntimePass {
|
||||||
public:
|
public:
|
||||||
ToNCHWFormat(Format src_format, Format dst_format, std::set<schema::PrimitiveType> to_trans_kernels)
|
ToNCHWFormat(const Format &src_format, const Format &dst_format, std::set<schema::PrimitiveType> to_trans_kernels)
|
||||||
: src_format_(src_format), dst_format_(dst_format), to_trans_kernels_(to_trans_kernels) {}
|
: src_format_(src_format), dst_format_(dst_format), to_trans_kernels_(to_trans_kernels) {}
|
||||||
~ToNCHWFormat() override = default;
|
~ToNCHWFormat() override = default;
|
||||||
int Run(kernel::SubGraphKernel *subgraph, std::vector<Tensor *> *tensors) override;
|
int Run(kernel::SubGraphKernel *subgraph, std::vector<Tensor *> *tensors) override;
|
||||||
|
|
|
@ -56,7 +56,7 @@ size_t TransposeStrategy::GetTransCount(const std::vector<kernel::KernelExec *>
|
||||||
return count;
|
return count;
|
||||||
}
|
}
|
||||||
|
|
||||||
bool CheckInTensorsShape(kernel::KernelExec *kernel, const Format &runtime_format) {
|
bool CheckInTensorsShape(const kernel::KernelExec *kernel, const Format &runtime_format) {
|
||||||
// If teh fusion is valid, kernel will be executed in runtime_format.
|
// If teh fusion is valid, kernel will be executed in runtime_format.
|
||||||
// Only check arithmetic (two input) kernel input tensors.
|
// Only check arithmetic (two input) kernel input tensors.
|
||||||
// If broadcast for various formats is supported, this function can be deleted.
|
// If broadcast for various formats is supported, this function can be deleted.
|
||||||
|
@ -87,7 +87,8 @@ bool CheckInTensorsShape(kernel::KernelExec *kernel, const Format &runtime_forma
|
||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
|
|
||||||
bool TransposeStrategy::CheckFusion(kernel::KernelExec *kernel, TransInfoPair *pre_trans, TransInfoPair *post_trans) {
|
bool TransposeStrategy::CheckFusion(const kernel::KernelExec *kernel, TransInfoPair *pre_trans,
|
||||||
|
TransInfoPair *post_trans) {
|
||||||
if (dynamic_format_kernel_lists.find(kernel->type()) == dynamic_format_kernel_lists.end()) {
|
if (dynamic_format_kernel_lists.find(kernel->type()) == dynamic_format_kernel_lists.end()) {
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
|
@ -155,24 +156,24 @@ int TransFormAxis(int axis, const TransInfoPair &trans) {
|
||||||
return axis;
|
return axis;
|
||||||
}
|
}
|
||||||
|
|
||||||
int HandleArgMinMaxKernel(kernel::KernelExec *kernel, const TransInfoPair &trans) {
|
int HandleArgMinMaxKernel(const kernel::KernelExec *kernel, const TransInfoPair &trans) {
|
||||||
auto arg_min_max_param = reinterpret_cast<ArgMinMaxParameter *>(kernel->op_parameter());
|
auto arg_min_max_param = reinterpret_cast<ArgMinMaxParameter *>(kernel->op_parameter());
|
||||||
CHECK_NULL_RETURN(arg_min_max_param);
|
CHECK_NULL_RETURN(arg_min_max_param);
|
||||||
arg_min_max_param->axis_ = TransFormAxis(arg_min_max_param->axis_, trans);
|
arg_min_max_param->axis_ = TransFormAxis(arg_min_max_param->axis_, trans);
|
||||||
return RET_OK;
|
return RET_OK;
|
||||||
}
|
}
|
||||||
|
|
||||||
int HandleConcatKernel(kernel::KernelExec *kernel, const TransInfoPair &trans) {
|
int HandleConcatKernel(const kernel::KernelExec *kernel, const TransInfoPair &trans) {
|
||||||
auto concat_param = reinterpret_cast<ConcatParameter *>(kernel->op_parameter());
|
auto concat_param = reinterpret_cast<ConcatParameter *>(kernel->op_parameter());
|
||||||
CHECK_NULL_RETURN(concat_param);
|
CHECK_NULL_RETURN(concat_param);
|
||||||
concat_param->axis_ = TransFormAxis(concat_param->axis_, trans);
|
concat_param->axis_ = TransFormAxis(concat_param->axis_, trans);
|
||||||
return RET_OK;
|
return RET_OK;
|
||||||
}
|
}
|
||||||
|
|
||||||
int HandleCropKernel(kernel::KernelExec *kernel, const TransInfoPair &trans) {
|
int HandleCropKernel(const kernel::KernelExec *kernel, const TransInfoPair &trans) {
|
||||||
auto crop_param = reinterpret_cast<CropParameter *>(kernel->op_parameter());
|
auto crop_param = reinterpret_cast<CropParameter *>(kernel->op_parameter());
|
||||||
CHECK_NULL_RETURN(crop_param);
|
CHECK_NULL_RETURN(crop_param);
|
||||||
crop_param->axis_ = TransFormAxis(crop_param->axis_, trans);
|
crop_param->axis_ = TransFormAxis(static_cast<int>(crop_param->axis_), trans);
|
||||||
return RET_OK;
|
return RET_OK;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -91,7 +91,7 @@ class TransposeStrategy {
|
||||||
~TransposeStrategy() = default;
|
~TransposeStrategy() = default;
|
||||||
|
|
||||||
size_t GetTransCount(const std::vector<kernel::KernelExec *> &kernels, TransInfoPair *trans_info);
|
size_t GetTransCount(const std::vector<kernel::KernelExec *> &kernels, TransInfoPair *trans_info);
|
||||||
bool CheckFusion(kernel::KernelExec *kernel, TransInfoPair *pre_trans, TransInfoPair *post_trans);
|
bool CheckFusion(const kernel::KernelExec *kernel, TransInfoPair *pre_trans, TransInfoPair *post_trans);
|
||||||
int ChangeKernelAxis(kernel::KernelExec *kernel, const TransInfoPair &post_trans);
|
int ChangeKernelAxis(kernel::KernelExec *kernel, const TransInfoPair &post_trans);
|
||||||
};
|
};
|
||||||
} // namespace mindspore::lite::pass
|
} // namespace mindspore::lite::pass
|
||||||
|
|
|
@ -190,7 +190,7 @@ int SubGraphKernel::TopologicalSortNodes() {
|
||||||
|
|
||||||
while (!kernel_queue.empty()) {
|
while (!kernel_queue.empty()) {
|
||||||
auto cur_kernel = kernel_queue.front();
|
auto cur_kernel = kernel_queue.front();
|
||||||
nodes_.emplace_back(cur_kernel);
|
(void)nodes_.emplace_back(cur_kernel);
|
||||||
kernel_queue.pop();
|
kernel_queue.pop();
|
||||||
CHECK_NULL_RETURN(cur_kernel);
|
CHECK_NULL_RETURN(cur_kernel);
|
||||||
auto next_kernels = cur_kernel->out_kernels();
|
auto next_kernels = cur_kernel->out_kernels();
|
||||||
|
@ -218,7 +218,7 @@ int SubGraphKernel::TopologicalSortNodes() {
|
||||||
return RET_OK;
|
return RET_OK;
|
||||||
}
|
}
|
||||||
|
|
||||||
void SubGraphKernel::InsertInEdge(KernelExec *kernel, KernelExec *replace_kernel, const int &tensor_index) {
|
void SubGraphKernel::InsertInEdge(KernelExec *kernel, KernelExec *replace_kernel, const size_t &tensor_index) {
|
||||||
// replace_kernel is a kernel with ont input tensor and output tensor
|
// replace_kernel is a kernel with ont input tensor and output tensor
|
||||||
auto in_kernel = KernelExecUtil::FindInKernelForInTensor(kernel, kernel->in_tensors().at(tensor_index));
|
auto in_kernel = KernelExecUtil::FindInKernelForInTensor(kernel, kernel->in_tensors().at(tensor_index));
|
||||||
if (in_kernel != nullptr) {
|
if (in_kernel != nullptr) {
|
||||||
|
@ -234,7 +234,7 @@ void SubGraphKernel::InsertInEdge(KernelExec *kernel, KernelExec *replace_kernel
|
||||||
nodes_.push_back(replace_kernel);
|
nodes_.push_back(replace_kernel);
|
||||||
}
|
}
|
||||||
|
|
||||||
void SubGraphKernel::InsertOutEdge(KernelExec *kernel, KernelExec *replace_kernel, const int &tensor_index) {
|
void SubGraphKernel::InsertOutEdge(KernelExec *kernel, KernelExec *replace_kernel, const size_t &tensor_index) {
|
||||||
// replace_kernel is a kernel with ont input tensor and output tensor
|
// replace_kernel is a kernel with ont input tensor and output tensor
|
||||||
auto out_kernels = KernelExecUtil::FindOutKernelsForOutTensor(kernel, kernel->out_tensors().at(tensor_index));
|
auto out_kernels = KernelExecUtil::FindOutKernelsForOutTensor(kernel, kernel->out_tensors().at(tensor_index));
|
||||||
for (const auto &post_kernel : out_kernels) {
|
for (const auto &post_kernel : out_kernels) {
|
||||||
|
@ -253,8 +253,8 @@ void SubGraphKernel::InsertOutEdge(KernelExec *kernel, KernelExec *replace_kerne
|
||||||
// in_kernel -> in_post_kernel -> out_pre_kernel -> out_kernels.
|
// in_kernel -> in_post_kernel -> out_pre_kernel -> out_kernels.
|
||||||
// remove in_post_kernel and out_pre_kernel, link in_kernel and out_kernels.
|
// remove in_post_kernel and out_pre_kernel, link in_kernel and out_kernels.
|
||||||
// in_post_kernel and out_pre_kernel can be the same kernel sometimes.
|
// in_post_kernel and out_pre_kernel can be the same kernel sometimes.
|
||||||
int SubGraphKernel::UpdateInOutKernels(KernelExec *in_kernel, std::vector<KernelExec *> out_kernels,
|
void SubGraphKernel::UpdateInOutKernels(KernelExec *in_kernel, std::vector<KernelExec *> out_kernels,
|
||||||
KernelExec *in_post_kernel, KernelExec *out_pre_kernel) {
|
KernelExec *in_post_kernel, KernelExec *out_pre_kernel) {
|
||||||
for (const auto &out_kernel : out_kernels) {
|
for (const auto &out_kernel : out_kernels) {
|
||||||
out_kernel->RemoveInKernel(out_pre_kernel);
|
out_kernel->RemoveInKernel(out_pre_kernel);
|
||||||
out_pre_kernel->RemoveOutKernel(out_kernel);
|
out_pre_kernel->RemoveOutKernel(out_kernel);
|
||||||
|
@ -280,7 +280,7 @@ int SubGraphKernel::UpdateInOutKernels(KernelExec *in_kernel, std::vector<Kernel
|
||||||
in_nodes_.push_back(out_kernel);
|
in_nodes_.push_back(out_kernel);
|
||||||
}
|
}
|
||||||
if (in_post_kernel->out_kernels().empty() && !lite::IsContain(out_nodes_, in_post_kernel)) {
|
if (in_post_kernel->out_kernels().empty() && !lite::IsContain(out_nodes_, in_post_kernel)) {
|
||||||
lite::VectorErase(&in_nodes_, in_post_kernel);
|
(void)lite::VectorErase(&in_nodes_, in_post_kernel);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -288,10 +288,9 @@ int SubGraphKernel::UpdateInOutKernels(KernelExec *in_kernel, std::vector<Kernel
|
||||||
if (lite::IsContain(out_nodes_, out_pre_kernel) && in_kernel != nullptr) {
|
if (lite::IsContain(out_nodes_, out_pre_kernel) && in_kernel != nullptr) {
|
||||||
out_nodes_.push_back(in_kernel);
|
out_nodes_.push_back(in_kernel);
|
||||||
if (out_pre_kernel->in_kernels().empty() && !lite::IsContain(in_nodes_, out_pre_kernel)) {
|
if (out_pre_kernel->in_kernels().empty() && !lite::IsContain(in_nodes_, out_pre_kernel)) {
|
||||||
lite::VectorErase(&out_nodes_, out_pre_kernel);
|
(void)lite::VectorErase(&out_nodes_, out_pre_kernel);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
return RET_OK;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
// Update tensor according to the subgraph.
|
// Update tensor according to the subgraph.
|
||||||
|
@ -340,14 +339,10 @@ int SubGraphKernel::DeleteSingleWayNode(KernelExec *kernel, bool keep_input) {
|
||||||
}
|
}
|
||||||
|
|
||||||
// update kernel link
|
// update kernel link
|
||||||
auto ret = UpdateInOutKernels(in_kernel, out_kernels, kernel, kernel);
|
UpdateInOutKernels(in_kernel, out_kernels, kernel, kernel);
|
||||||
if (ret != RET_OK) {
|
|
||||||
MS_LOG(ERROR) << "Update kernel link failed when removing kernel " << kernel->name();
|
|
||||||
return RET_ERROR;
|
|
||||||
}
|
|
||||||
|
|
||||||
// update tensor link
|
// update tensor link
|
||||||
ret = UpdateInOutTensors(in_kernel, out_kernels, in_tensor, out_tensor, keep_input);
|
auto ret = UpdateInOutTensors(in_kernel, out_kernels, in_tensor, out_tensor, keep_input);
|
||||||
if (ret != RET_OK) {
|
if (ret != RET_OK) {
|
||||||
MS_LOG(ERROR) << "Update tensor failed when removing kernel " << kernel->name();
|
MS_LOG(ERROR) << "Update tensor failed when removing kernel " << kernel->name();
|
||||||
return RET_ERROR;
|
return RET_ERROR;
|
||||||
|
|
|
@ -112,20 +112,20 @@ class SubGraphKernel : public KernelExec {
|
||||||
|
|
||||||
std::vector<KernelExec *> out_nodes() { return this->out_nodes_; }
|
std::vector<KernelExec *> out_nodes() { return this->out_nodes_; }
|
||||||
|
|
||||||
void SetInNodes(std::vector<KernelExec *> in_nodes) { in_nodes_ = in_nodes; }
|
void SetInNodes(const std::vector<KernelExec *> &in_nodes) { in_nodes_ = in_nodes; }
|
||||||
|
|
||||||
void SetOutNodes(std::vector<KernelExec *> out_nodes) { out_nodes_ = out_nodes; }
|
void SetOutNodes(const std::vector<KernelExec *> &out_nodes) { out_nodes_ = out_nodes; }
|
||||||
|
|
||||||
void SetSchemaVersion(int schema_version) { schema_version_ = schema_version; }
|
void SetSchemaVersion(int schema_version) { schema_version_ = schema_version; }
|
||||||
|
|
||||||
int TopologicalSortNodes();
|
int TopologicalSortNodes();
|
||||||
|
|
||||||
void InsertInEdge(KernelExec *kernel, KernelExec *replace_kernel, const int &tensor_index);
|
void InsertInEdge(KernelExec *kernel, KernelExec *replace_kernel, const size_t &tensor_index);
|
||||||
|
|
||||||
void InsertOutEdge(KernelExec *kernel, KernelExec *replace_kernel, const int &tensor_index);
|
void InsertOutEdge(KernelExec *kernel, KernelExec *replace_kernel, const size_t &tensor_index);
|
||||||
|
|
||||||
int UpdateInOutKernels(KernelExec *in_kernel, std::vector<KernelExec *> out_kernels, KernelExec *in_post_kernel,
|
void UpdateInOutKernels(KernelExec *in_kernel, std::vector<KernelExec *> out_kernels, KernelExec *in_post_kernel,
|
||||||
KernelExec *out_pre_kernel);
|
KernelExec *out_pre_kernel);
|
||||||
|
|
||||||
int UpdateInOutTensors(KernelExec *in_kernel, std::vector<KernelExec *> out_kernels, lite::Tensor *in_tensor,
|
int UpdateInOutTensors(KernelExec *in_kernel, std::vector<KernelExec *> out_kernels, lite::Tensor *in_tensor,
|
||||||
lite::Tensor *out_tensor, bool keep_input);
|
lite::Tensor *out_tensor, bool keep_input);
|
||||||
|
|
|
@ -38,7 +38,7 @@ constexpr size_t kFlatbuffersBuilderInitSize = 1024;
|
||||||
void ChangeMod(const std::string &file_path) {
|
void ChangeMod(const std::string &file_path) {
|
||||||
#ifndef _MSC_VER
|
#ifndef _MSC_VER
|
||||||
if (access(file_path.c_str(), F_OK) == 0) {
|
if (access(file_path.c_str(), F_OK) == 0) {
|
||||||
chmod(file_path.c_str(), S_IWUSR | S_IRUSR);
|
(void)chmod(file_path.c_str(), S_IWUSR | S_IRUSR);
|
||||||
}
|
}
|
||||||
#endif
|
#endif
|
||||||
}
|
}
|
||||||
|
|
|
@ -246,7 +246,7 @@ int FetchFromSequenceValue(const ValueNodePtr &value_node, DataInfo *data_info)
|
||||||
}
|
}
|
||||||
} // namespace
|
} // namespace
|
||||||
|
|
||||||
int FetchFromDefaultParam(const ParameterPtr ¶m_node, const converter::FmkType &fmk_type, DataInfo *data_info,
|
int FetchFromDefaultParam(const ParameterPtr ¶m_node, const converter::FmkType &, DataInfo *data_info,
|
||||||
bool copy_data) {
|
bool copy_data) {
|
||||||
MS_ASSERT(param_node != nullptr && data_info != nullptr);
|
MS_ASSERT(param_node != nullptr && data_info != nullptr);
|
||||||
ShapeVector shape_vector;
|
ShapeVector shape_vector;
|
||||||
|
|
|
@ -72,8 +72,9 @@ ParameterPtr CreateNewParamter(const FuncGraphPtr &func_graph, Tensor *tensor) {
|
||||||
}
|
}
|
||||||
return parameter;
|
return parameter;
|
||||||
}
|
}
|
||||||
|
|
||||||
kernel::KernelExec *GetKernelExec(std::vector<Tensor *> inputs, std::vector<Tensor *> *outputs, const CNodePtr &cnode,
|
kernel::KernelExec *GetKernelExec(std::vector<Tensor *> inputs, std::vector<Tensor *> *outputs, const CNodePtr &cnode,
|
||||||
lite::InnerContext *context, mindspore::Context *ms_context) {
|
const lite::InnerContext *context, const mindspore::Context *ms_context) {
|
||||||
MS_ASSERT(outputs != nullptr && cnode != nullptr && context != nullptr && ms_context != nullptr);
|
MS_ASSERT(outputs != nullptr && cnode != nullptr && context != nullptr && ms_context != nullptr);
|
||||||
OpParameter *parameter = nullptr;
|
OpParameter *parameter = nullptr;
|
||||||
auto ret = lite::FetchOpParameterFromNode(cnode->input(0), ¶meter);
|
auto ret = lite::FetchOpParameterFromNode(cnode->input(0), ¶meter);
|
||||||
|
@ -92,8 +93,7 @@ kernel::KernelExec *GetKernelExec(std::vector<Tensor *> inputs, std::vector<Tens
|
||||||
return nullptr;
|
return nullptr;
|
||||||
}
|
}
|
||||||
auto data_type = inputs.front()->data_type();
|
auto data_type = inputs.front()->data_type();
|
||||||
kernel::KernelKey desc{kernel::KERNEL_ARCH::kCPU, data_type, NHWC,
|
kernel::KernelKey desc{kernel::KERNEL_ARCH::kCPU, data_type, NHWC, parameter->type_};
|
||||||
static_cast<schema::PrimitiveType>(parameter->type_)};
|
|
||||||
kernel::KernelExec *kernel_exec = nullptr;
|
kernel::KernelExec *kernel_exec = nullptr;
|
||||||
ret = lite::KernelRegistry::GetInstance()->GetKernelExec(inputs, *outputs, context, ms_context, desc, parameter,
|
ret = lite::KernelRegistry::GetInstance()->GetKernelExec(inputs, *outputs, context, ms_context, desc, parameter,
|
||||||
&kernel_exec);
|
&kernel_exec);
|
||||||
|
|
|
@ -588,7 +588,7 @@ int DecreaseTransposeAlgo::SetSubGraphInput(const CNodePtr &cnode, const FuncGra
|
||||||
auto last_underline = node_name.find_last_of("_");
|
auto last_underline = node_name.find_last_of("_");
|
||||||
node_name = node_name.substr(0, last_underline);
|
node_name = node_name.substr(0, last_underline);
|
||||||
last_underline = node_name.find_last_of("_");
|
last_underline = node_name.find_last_of("_");
|
||||||
auto index = 0;
|
size_t index = 0;
|
||||||
try {
|
try {
|
||||||
index = std::stoi(node_name.substr(last_underline + 1)) + static_cast<int>(kInputSizeThree);
|
index = std::stoi(node_name.substr(last_underline + 1)) + static_cast<int>(kInputSizeThree);
|
||||||
} catch (const std::exception &e) {
|
} catch (const std::exception &e) {
|
||||||
|
|
Loading…
Reference in New Issue