forked from mindspore-Ecosystem/mindspore
!4867 Clean cmake building warnings.
Merge pull request !4867 from wangshaocong/lite_clean
This commit is contained in:
commit
6782325bfd
|
@ -64,14 +64,25 @@ set(CMAKE_VERBOSE_MAKEFILE on)
|
|||
add_compile_definitions(USE_ANDROID_LOG)
|
||||
add_compile_definitions(NO_DLIB)
|
||||
add_compile_options(-fPIC)
|
||||
if("${CMAKE_BUILD_TYPE}" STREQUAL "Release")
|
||||
#set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -fvisibility=hidden")
|
||||
string(REPLACE "-g" " " CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS}")
|
||||
endif()
|
||||
if (NOT PLATFORM_ARM64 AND NOT PLATFORM_ARM32)
|
||||
if ("${CMAKE_BUILD_TYPE}" STREQUAL "Debug")
|
||||
set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -DDebug -g")
|
||||
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -DDebug -g")
|
||||
set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -fvisibility=default")
|
||||
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -fvisibility=default")
|
||||
else ()
|
||||
## enable for binscope for release
|
||||
set(CMAKE_C_FLAGS "-fPIC -fPIE -D_FORTIFY_SOURCE=2 -O2 -Wall -Werror -fstack-protector-strong -Wno-attributes -Wno-deprecated-declarations ${CMAKE_C_FLAGS}")
|
||||
set(CMAKE_CXX_FLAGS "-fPIC -fPIE -D_FORTIFY_SOURCE=2 -O2 -Wall -Werror -fstack-protector-strong -Wno-attributes -Wno-deprecated-declarations ${CMAKE_CXX_FLAGS}")
|
||||
set(CMAKE_SHARED_LINKER_FLAGS "-Wl,-z,relro,-z,now -Wl,-z,noexecstack ${CMAKE_SHARED_LINKER_FLAGS}")
|
||||
set(CMAKE_EXE_LINKER_FLAGS "-Wl,-z,relro,-z,now -Wl,-z,noexecstack ${CMAKE_EXE_LINKER_FLAGS}")
|
||||
string(REPLACE " -g " " " CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS}")
|
||||
endif ()
|
||||
endif ()
|
||||
|
||||
if (BUILD_DEVICE)
|
||||
add_compile_definitions(BUILD_DEVICE)
|
||||
endif()
|
||||
endif ()
|
||||
if (SUPPORT_TRAIN)
|
||||
add_compile_definitions(SUPPORT_TRAIN)
|
||||
endif()
|
||||
|
@ -86,17 +97,17 @@ if (SUPPORT_GPU)
|
|||
add_definitions(-DMS_OPENCL_PROFILE=false)
|
||||
add_definitions(-DCL_HPP_TARGET_OPENCL_VERSION=200)
|
||||
add_compile_definitions(SUPPORT_GPU)
|
||||
if(OFFLINE_COMPILE)
|
||||
if (OFFLINE_COMPILE)
|
||||
add_compile_definitions(PROGRAM_WITH_IL)
|
||||
endif()
|
||||
endif ()
|
||||
include_directories(${TOP_DIR}/third_party/OpenCL-Headers)
|
||||
include_directories(${TOP_DIR}/third_party/OpenCL-CLHPP/include)
|
||||
endif()
|
||||
endif ()
|
||||
|
||||
if (WIN32)
|
||||
add_compile_definitions(LITE_EXPORTS)
|
||||
add_compile_definitions(BUILDING_DLL)
|
||||
endif()
|
||||
endif ()
|
||||
|
||||
set(ANF_SRC
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/../core/ir/meta_tensor.cc
|
||||
|
@ -110,26 +121,26 @@ if (BUILD_CONVERTER)
|
|||
MESSAGE(FATAL_ERROR "Cannot build converter in arm platform")
|
||||
endif()
|
||||
find_package(Python3 3.7 COMPONENTS Interpreter Development)
|
||||
if(Python3_FOUND)
|
||||
if (Python3_FOUND)
|
||||
set(PYTHON_INCLUDE_DIRS "${Python3_INCLUDE_DIRS}")
|
||||
set(PYTHON_LIBRARIES "${Python3_LIBRARIES}")
|
||||
if (WIN32)
|
||||
if (Python3_DIR)
|
||||
message("Python3_DIR set already: " ${Python3_DIR})
|
||||
else()
|
||||
else ()
|
||||
string(LENGTH ${PYTHON_LIBRARIES} PYTHON_LIBRARIES_LEN)
|
||||
string(LENGTH "libpythonxx.a" Python3_NAME_LEN)
|
||||
math(EXPR Python3_DIR_LEN ${PYTHON_LIBRARIES_LEN}-${Python3_NAME_LEN})
|
||||
string(SUBSTRING ${Python3_LIBRARIES} 0 ${Python3_DIR_LEN} Python3_DIR)
|
||||
message("Python3_DIR: " ${Python3_DIR})
|
||||
endif()
|
||||
endif ()
|
||||
link_directories(${Python3_DIR})
|
||||
endif()
|
||||
else()
|
||||
endif ()
|
||||
else ()
|
||||
find_python_package(py_inc py_lib)
|
||||
set(PYTHON_INCLUDE_DIRS "${py_inc}")
|
||||
set(PYTHON_LIBRARIES "${py_lib}")
|
||||
endif()
|
||||
endif ()
|
||||
include_directories(${PYTHON_INCLUDE_DIRS})
|
||||
include(${TOP_DIR}/cmake/external_libs/json.cmake)
|
||||
include(${TOP_DIR}/cmake/external_libs/pybind11.cmake)
|
||||
|
@ -137,27 +148,27 @@ if (BUILD_CONVERTER)
|
|||
include_directories(${TOP_DIR}/third_party/protobuf/build/include)
|
||||
link_directories(${TOP_DIR}/third_party/protobuf/build/lib)
|
||||
add_subdirectory(${CMAKE_CURRENT_SOURCE_DIR}/tools/converter)
|
||||
endif()
|
||||
endif ()
|
||||
|
||||
if (BUILD_DEVICE)
|
||||
if (PLATFORM_ARM32 OR PLATFORM_ARM64)
|
||||
if (NOT DEFINED ENV{ANDROID_NDK})
|
||||
message(FATAL_ERROR "env ANDROID_NDK should be setted for ARM compile")
|
||||
endif()
|
||||
endif ()
|
||||
add_compile_definitions(ENABLE_ARM)
|
||||
endif()
|
||||
endif ()
|
||||
if (PLATFORM_ARM32)
|
||||
add_definitions(-mfloat-abi=softfp -mfpu=neon)
|
||||
add_compile_definitions(ENABLE_ARM32)
|
||||
endif()
|
||||
endif ()
|
||||
if (PLATFORM_ARM64)
|
||||
set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -march=armv8.2-a+dotprod+fp16")
|
||||
add_compile_definitions(ENABLE_ARM64)
|
||||
if (ENABLE_FP16)
|
||||
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -march=armv8.2-a+dotprod+fp16")
|
||||
endif ()
|
||||
endif()
|
||||
endif()
|
||||
endif ()
|
||||
endif ()
|
||||
|
||||
if (BUILD_MINDDATA)
|
||||
# opencv
|
||||
|
@ -167,7 +178,7 @@ if (BUILD_MINDDATA)
|
|||
# json
|
||||
if (NOT BUILD_CONVERTER)
|
||||
include(${TOP_DIR}/cmake/external_libs/json.cmake)
|
||||
endif()
|
||||
endif ()
|
||||
# eigen
|
||||
include_directories(${TOP_DIR}/third_party/eigen/)
|
||||
# jpeg-turbo
|
||||
|
@ -183,7 +194,7 @@ if (BUILD_MINDDATA)
|
|||
|
||||
add_compile_definitions(ENABLE_ANDROID)
|
||||
add_subdirectory(${CMAKE_CURRENT_SOURCE_DIR}/minddata)
|
||||
endif()
|
||||
endif ()
|
||||
|
||||
if (BUILD_DEVICE)
|
||||
add_subdirectory(${CMAKE_CURRENT_SOURCE_DIR}/src)
|
||||
|
@ -191,7 +202,7 @@ if (BUILD_DEVICE)
|
|||
add_subdirectory(${CMAKE_CURRENT_SOURCE_DIR}/tools/benchmark)
|
||||
add_subdirectory(${CMAKE_CURRENT_SOURCE_DIR}/test)
|
||||
add_subdirectory(${CMAKE_CURRENT_SOURCE_DIR}/tools/time_profile)
|
||||
endif()
|
||||
endif()
|
||||
endif ()
|
||||
endif ()
|
||||
|
||||
include(${TOP_DIR}/cmake/package_lite.cmake)
|
|
@ -37,11 +37,6 @@ static constexpr int kNHWCDimNumber = 4;
|
|||
|
||||
static constexpr int TENSOR_MAX_REFCOUNT = 999;
|
||||
|
||||
static const char *DELIM_COLON = ":";
|
||||
static const char *DELIM_COMMA = ",";
|
||||
static const char *DELIM_SLASH = "/";
|
||||
static const char *DELIM_DOUBLE_BACKSLASH = "\\";
|
||||
|
||||
// quantization relative
|
||||
static const char QUANTIZED_UINT8[] = "QUANTIZED_UINT8";
|
||||
static const char QUANTIZED_INT8[] = "QUANTIZED_INT8";
|
||||
|
|
|
@ -103,7 +103,7 @@ int WriteToBin(const std::string &file_path, void *data, size_t size) {
|
|||
|
||||
int CompareOutputData(float *output_data, float *correct_data, int data_size) {
|
||||
float error = 0;
|
||||
for (size_t i = 0; i < data_size; i++) {
|
||||
for (int i = 0; i < data_size; i++) {
|
||||
float abs = fabs(output_data[i] - correct_data[i]);
|
||||
if (abs > 0.00001) {
|
||||
error += abs;
|
||||
|
|
|
@ -237,7 +237,7 @@ std::string Tensor::ToString() const {
|
|||
if (data == nullptr) {
|
||||
return "Data of tensor is nullptr";
|
||||
} else {
|
||||
for (size_t i = 0; i < 40 && i < this->ElementsNum(); i++) {
|
||||
for (int i = 0; i < 40 && i < this->ElementsNum(); i++) {
|
||||
oss << " " << data[i];
|
||||
}
|
||||
}
|
||||
|
@ -247,7 +247,7 @@ std::string Tensor::ToString() const {
|
|||
if (data == nullptr) {
|
||||
return "Data of tensor is nullptr";
|
||||
} else {
|
||||
for (size_t i = 0; i < 40 && i < this->ElementsNum(); i++) {
|
||||
for (int i = 0; i < 40 && i < this->ElementsNum(); i++) {
|
||||
oss << " " << data[i];
|
||||
}
|
||||
}
|
||||
|
|
|
@ -187,8 +187,8 @@ class Tensor : public mindspore::tensor::MetaTensor {
|
|||
protected:
|
||||
void *data_ = nullptr;
|
||||
void *device_data_ = nullptr;
|
||||
schema::NodeType tensorType;
|
||||
schema::Format format_;
|
||||
schema::NodeType tensorType;
|
||||
size_t refCount = 0;
|
||||
std::vector<tensor::QuantArg> quant_params_;
|
||||
mindspore::lite::Allocator *allocator_ = nullptr;
|
||||
|
|
|
@ -154,11 +154,11 @@ class LiteKernel {
|
|||
KernelKey desc_;
|
||||
std::string name_;
|
||||
OpParameter *op_parameter_ = nullptr;
|
||||
const mindspore::lite::PrimitiveC *primitive_ = nullptr;
|
||||
const lite::Context *context_ = nullptr;
|
||||
// tensor will free in ~lite_session()
|
||||
std::vector<lite::tensor::Tensor *> in_tensors_;
|
||||
std::vector<lite::tensor::Tensor *> out_tensors_;
|
||||
const mindspore::lite::PrimitiveC *primitive_ = nullptr;
|
||||
const lite::Context *context_ = nullptr;
|
||||
std::vector<LiteKernel *> in_kernels_;
|
||||
std::vector<LiteKernel *> out_kernels_;
|
||||
bool train_mode_ = false;
|
||||
|
|
|
@ -66,7 +66,7 @@ int LiteSession::ConvertTensors(const lite::Model *model) {
|
|||
}
|
||||
auto quant_params = srcTensor->quantParams();
|
||||
if (quant_params != nullptr) {
|
||||
for (int j = 0; j < quant_params->size(); j++) {
|
||||
for (size_t j = 0; j < quant_params->size(); j++) {
|
||||
tensor::QuantArg quant_arg{};
|
||||
quant_arg.scale = quant_params->Get(j)->scale();
|
||||
quant_arg.zeroPoint = quant_params->Get(j)->zeroPoint();
|
||||
|
@ -93,9 +93,7 @@ void LiteSession::InitGraphInputTensors(const lite::Model *model) {
|
|||
}
|
||||
|
||||
void LiteSession::InitGraphInputMSTensors(const lite::Model *model) {
|
||||
auto meta_graph = model->GetMetaGraph();
|
||||
MS_ASSERT(this->input_vec_.empty());
|
||||
MS_ASSERT(meta_graph != nullptr);
|
||||
for (auto &input_tensor : this->inputs_) {
|
||||
MS_ASSERT(input_tensor != nullptr);
|
||||
this->input_vec_.emplace_back(new lite::tensor::LiteTensor(input_tensor));
|
||||
|
|
|
@ -52,10 +52,9 @@ int Fill::InferShape(std::vector<tensor::Tensor *> inputs_, std::vector<tensor::
|
|||
}
|
||||
|
||||
std::vector<int> output_shape;
|
||||
for (int i = 0; i < GetDims().size(); i++) {
|
||||
for (size_t i = 0; i < GetDims().size(); i++) {
|
||||
output_shape.push_back(GetDims()[i]);
|
||||
}
|
||||
// (void)output_shape.insert(output_shape.begin(), GetDims().begin(), GetDims().end());
|
||||
output->set_shape(output_shape);
|
||||
return RET_OK;
|
||||
}
|
||||
|
|
|
@ -64,7 +64,7 @@ int FullConnection::InferShape(std::vector<lite::tensor::Tensor *> inputs_,
|
|||
}
|
||||
int new_k = 1;
|
||||
if (GetUseAxis()) {
|
||||
for (int i = GetAxis(); i < input0->shape().size(); ++i) {
|
||||
for (size_t i = GetAxis(); i < input0->shape().size(); ++i) {
|
||||
new_k *= input0->shape()[i];
|
||||
}
|
||||
if (new_k != input1->shape()[1]) {
|
||||
|
@ -86,7 +86,7 @@ int FullConnection::InferShape(std::vector<lite::tensor::Tensor *> inputs_,
|
|||
out_shape[GetAxis()] = input1->shape()[0];
|
||||
} else {
|
||||
int total = 1;
|
||||
for (int i = 0; i < input0->shape().size(); ++i) {
|
||||
for (size_t i = 0; i < input0->shape().size(); ++i) {
|
||||
total *= input0->shape()[i];
|
||||
}
|
||||
out_shape.resize(2);
|
||||
|
|
|
@ -43,7 +43,6 @@ void Pad::SetPaddingMode(int padding_mode) {}
|
|||
void Pad::SetConstantValue(float constant_value) {}
|
||||
#endif
|
||||
namespace {
|
||||
const size_t kPaddingsSize = 8;
|
||||
const size_t kInputRank = 4;
|
||||
} // namespace
|
||||
int Pad::InferShape(std::vector<tensor::Tensor *> inputs, std::vector<tensor::Tensor *> outputs) {
|
||||
|
|
|
@ -145,10 +145,9 @@ int Reshape::InferShape(std::vector<tensor::Tensor *> inputs_, std::vector<tenso
|
|||
}
|
||||
}
|
||||
} else if (inputs_.size() == kSingleNum) {
|
||||
for (int i = 0; i < GetShape().size(); ++i) {
|
||||
for (size_t i = 0; i < GetShape().size(); ++i) {
|
||||
out_shape.push_back(GetShape()[i]);
|
||||
}
|
||||
// std::copy(GetShape().begin(), GetShape().end(), std::back_inserter(out_shape));
|
||||
} else {
|
||||
MS_LOG(ERROR) << "inputs tensor size invalid.";
|
||||
return RET_INFER_ERR;
|
||||
|
|
|
@ -75,7 +75,7 @@ int Split::InferShape(std::vector<tensor::Tensor *> inputs_, std::vector<tensor:
|
|||
int split_dim = GetSplitDim();
|
||||
std::vector<int> input_shape = input->shape();
|
||||
std::vector<int> size_split;
|
||||
for (int i = 0; i < GetSizeSplits().size(); ++i) {
|
||||
for (size_t i = 0; i < GetSizeSplits().size(); ++i) {
|
||||
size_split.push_back(GetSizeSplits()[i]);
|
||||
}
|
||||
for (int i = 0; i < number_split; ++i) {
|
||||
|
|
|
@ -60,10 +60,9 @@ int Tile::InferShape(std::vector<tensor::Tensor *> inputs_, std::vector<tensor::
|
|||
MS_ASSERT(tile_prim != nullptr);
|
||||
std::vector<int> out_shape;
|
||||
std::vector<int> multiples;
|
||||
for (int i = 0; i < GetMultiples().size(); ++i) {
|
||||
for (size_t i = 0; i < GetMultiples().size(); ++i) {
|
||||
multiples.push_back(GetMultiples()[i]);
|
||||
}
|
||||
// std::copy(GetMultiples().begin(), GetMultiples().end(), std::back_inserter(multiples));
|
||||
for (size_t i = 0; i < input->shape().size(); ++i) {
|
||||
int tmp = input->shape()[i] * multiples[i];
|
||||
out_shape.push_back(tmp);
|
||||
|
|
|
@ -59,10 +59,9 @@ int Transpose::InferShape(std::vector<tensor::Tensor *> inputs_, std::vector<ten
|
|||
return RET_ERROR;
|
||||
}
|
||||
std::vector<int> perm;
|
||||
for (int i = 0; i < GetPerm().size(); i++) {
|
||||
for (size_t i = 0; i < GetPerm().size(); i++) {
|
||||
perm.push_back(GetPerm()[i]);
|
||||
}
|
||||
// perm.insert(perm.begin(), GetPerm().begin(), GetPerm().end());
|
||||
std::vector<int> in_shape = input->shape();
|
||||
std::vector<int> out_shape;
|
||||
out_shape.resize(perm.size());
|
||||
|
|
|
@ -246,7 +246,7 @@ OpParameter *PopulatePreluParameter(const mindspore::lite::PrimitiveC *primitive
|
|||
}
|
||||
prelu_param->op_parameter_.type_ = primitive->Type();
|
||||
auto temp = param->GetSlope();
|
||||
for (int i = 0; i < temp.size(); i++) {
|
||||
for (size_t i = 0; i < temp.size(); i++) {
|
||||
prelu_param->slope_[i] = temp[i];
|
||||
}
|
||||
return reinterpret_cast<OpParameter *>(prelu_param);
|
||||
|
@ -404,7 +404,6 @@ OpParameter *PopulateConvDwParameter(const mindspore::lite::PrimitiveC *primitiv
|
|||
conv_param->stride_h_ = conv_primitive->GetStrideH();
|
||||
conv_param->stride_w_ = conv_primitive->GetStrideW();
|
||||
|
||||
auto pad_mode = conv_primitive->GetPadMode();
|
||||
auto convdw_lite_primitive = (lite::DepthwiseConv2D *)primitive;
|
||||
MS_ASSERT(nullptr != convdw_lite_primitive);
|
||||
conv_param->pad_u_ = convdw_lite_primitive->PadUp();
|
||||
|
@ -828,7 +827,7 @@ OpParameter *PopulateTileParameter(const mindspore::lite::PrimitiveC *primitive)
|
|||
auto param = dynamic_cast<const mindspore::lite::Tile *>(primitive);
|
||||
auto multiples = param->GetMultiples();
|
||||
tile_param->in_dim_ = multiples.size();
|
||||
for (size_t i = 0; i < tile_param->in_dim_; ++i) {
|
||||
for (int i = 0; i < tile_param->in_dim_; ++i) {
|
||||
tile_param->multiples_[i] = multiples[i];
|
||||
}
|
||||
return reinterpret_cast<OpParameter *>(tile_param);
|
||||
|
@ -1231,7 +1230,7 @@ OpParameter *PopulateCropParameter(const mindspore::lite::PrimitiveC *primitive)
|
|||
crop_param->op_parameter_.type_ = primitive->Type();
|
||||
crop_param->axis_ = param->GetAxis();
|
||||
crop_param->offset_size_ = param_offset.size();
|
||||
for (int i = 0; i < param_offset.size(); ++i) {
|
||||
for (size_t i = 0; i < param_offset.size(); ++i) {
|
||||
crop_param->offset_[i] = param_offset[i];
|
||||
}
|
||||
return reinterpret_cast<OpParameter *>(crop_param);
|
||||
|
|
|
@ -43,8 +43,8 @@ class CaffePreluBaseCPUKernel : public LiteKernel {
|
|||
int Run() override { return 0; }
|
||||
|
||||
protected:
|
||||
int thread_count_;
|
||||
const Context *ctx_;
|
||||
int thread_count_;
|
||||
CaffePreluParameter *prelu_param_;
|
||||
};
|
||||
} // namespace mindspore::kernel
|
||||
|
|
|
@ -43,9 +43,9 @@ class ConcatBaseCPUKernel : public LiteKernel {
|
|||
int Run() override { return 0; }
|
||||
|
||||
protected:
|
||||
int thread_count_;
|
||||
int axis_;
|
||||
const Context *ctx_;
|
||||
int thread_count_;
|
||||
ConcatParameter *concat_param_ = nullptr;
|
||||
};
|
||||
} // namespace mindspore::kernel
|
||||
|
|
|
@ -121,7 +121,7 @@ int ConvolutionBaseCPUKernel::SetIfPerChannel() {
|
|||
uint8_t per_channel = 0b0;
|
||||
if (conv_quant_arg_->input_arg_num_ != kPerTensor) {
|
||||
int in_channel = conv_param_->input_channel_;
|
||||
if (conv_quant_arg_->input_arg_num_ != in_channel) {
|
||||
if (static_cast<int>(conv_quant_arg_->input_arg_num_) != in_channel) {
|
||||
MS_LOG(ERROR) << "input per channel quant param length is not equal to input channel.";
|
||||
return RET_ERROR;
|
||||
}
|
||||
|
@ -130,7 +130,7 @@ int ConvolutionBaseCPUKernel::SetIfPerChannel() {
|
|||
|
||||
if (conv_quant_arg_->filter_arg_num_ != kPerTensor) {
|
||||
int filter_num = conv_param_->output_channel_;
|
||||
if (conv_quant_arg_->filter_arg_num_ != filter_num) {
|
||||
if (static_cast<int>(conv_quant_arg_->filter_arg_num_) != filter_num) {
|
||||
MS_LOG(ERROR) << "weight per channel quant param length is not equal to filter num.";
|
||||
return RET_ERROR;
|
||||
}
|
||||
|
@ -139,7 +139,7 @@ int ConvolutionBaseCPUKernel::SetIfPerChannel() {
|
|||
|
||||
if (conv_quant_arg_->output_arg_num_ != kPerTensor) {
|
||||
int out_channel = conv_param_->output_channel_;
|
||||
if (conv_quant_arg_->output_arg_num_ != out_channel) {
|
||||
if (static_cast<int>(conv_quant_arg_->output_arg_num_) != out_channel) {
|
||||
MS_LOG(ERROR) << "output per channel quant param length is not equal to output channel.";
|
||||
return RET_ERROR;
|
||||
}
|
||||
|
@ -218,11 +218,6 @@ int ConvolutionBaseCPUKernel::SetInputTensorQuantParam() {
|
|||
// per channel
|
||||
MS_LOG(ERROR) << "Not Support Per Channel for input now.";
|
||||
return RET_ERROR;
|
||||
// auto input_quant_arg = input_tensor->GetQuantParams();
|
||||
// for (int i = 0; i < in_arg_num; ++i) {
|
||||
// conv_quant_arg_->input_quant_args_[i].zp_ = input_quant_arg[i].zeroPoint;
|
||||
// conv_quant_arg_->input_quant_args_[i].scale_ = input_quant_arg[i].scale;
|
||||
// }
|
||||
}
|
||||
return RET_OK;
|
||||
}
|
||||
|
@ -236,7 +231,7 @@ int ConvolutionBaseCPUKernel::SetFilterTensorQuantParam() {
|
|||
conv_quant_arg_->filter_quant_args_[0].scale_ = weight_quant_arg.scale;
|
||||
} else {
|
||||
auto weight_quant_arg = weight_tensor->GetQuantParams();
|
||||
for (int i = 0; i < weight_arg_num; ++i) {
|
||||
for (size_t i = 0; i < weight_arg_num; ++i) {
|
||||
conv_quant_arg_->filter_quant_args_[i].zp_ = weight_quant_arg[i].zeroPoint;
|
||||
conv_quant_arg_->filter_quant_args_[i].scale_ = weight_quant_arg[i].scale;
|
||||
}
|
||||
|
|
|
@ -62,11 +62,11 @@ class ConvolutionBaseCPUKernel : public LiteKernel {
|
|||
void FreeQuantParam();
|
||||
|
||||
protected:
|
||||
int thread_count_;
|
||||
int tile_num_;
|
||||
void *bias_data_ = nullptr;
|
||||
void *nhwc4_input_ = nullptr;
|
||||
const Context *ctx_;
|
||||
int thread_count_;
|
||||
ConvParameter *conv_param_;
|
||||
ConvQuantArg *conv_quant_arg_;
|
||||
LayoutConvertor convert_func_;
|
||||
|
|
|
@ -41,9 +41,9 @@ class FullconnectionBaseCPUKernel : public LiteKernel {
|
|||
|
||||
protected:
|
||||
MatMulParameter *fc_param_;
|
||||
int thread_count_;
|
||||
int thread_stride_;
|
||||
const Context *ctx_;
|
||||
int thread_count_;
|
||||
};
|
||||
} // namespace mindspore::kernel
|
||||
|
||||
|
|
|
@ -41,9 +41,9 @@ class MatmulBaseCPUKernel : public LiteKernel {
|
|||
|
||||
protected:
|
||||
MatMulParameter *params_;
|
||||
int thread_count_;
|
||||
int thread_stride_;
|
||||
const Context *ctx_;
|
||||
int thread_count_;
|
||||
};
|
||||
} // namespace mindspore::kernel
|
||||
|
||||
|
|
|
@ -43,8 +43,8 @@ class PoolingBaseCPUKernel : public LiteKernel {
|
|||
void FreeQuantParam();
|
||||
|
||||
protected:
|
||||
int thread_count_;
|
||||
const Context *ctx_;
|
||||
int thread_count_;
|
||||
PoolingParameter *pooling_param_;
|
||||
QuantArg **pooling_quant_arg_ = nullptr;
|
||||
};
|
||||
|
|
|
@ -41,8 +41,8 @@ class PriorBoxCPUKernel : public LiteKernel {
|
|||
int PriorBoxImpl(int task_id);
|
||||
|
||||
protected:
|
||||
int thread_count_;
|
||||
const Context *ctx_;
|
||||
int thread_count_;
|
||||
|
||||
private:
|
||||
std::vector<float> output_;
|
||||
|
|
|
@ -76,7 +76,7 @@ int ReduceBaseCPUKernel::CheckParameters() {
|
|||
}
|
||||
|
||||
if (num_axes_ == 0) {
|
||||
for (int i = 0; i < input_rank; i++) {
|
||||
for (size_t i = 0; i < input_rank; i++) {
|
||||
axes_[i] = i;
|
||||
}
|
||||
num_axes_ = static_cast<int>(input_rank);
|
||||
|
|
|
@ -45,7 +45,7 @@ int SliceBaseCPUKernel::ReSize() {
|
|||
param_->begin_[DIMENSION_4D - j] = param_->begin_[i];
|
||||
param_->size_[DIMENSION_4D - j] = param_->size_[i];
|
||||
}
|
||||
for (size_t i = 0; i < DIMENSION_4D - param_->param_length_; i++) {
|
||||
for (int i = 0; i < DIMENSION_4D - param_->param_length_; i++) {
|
||||
param_->begin_[i] = 0;
|
||||
param_->size_[i] = 1;
|
||||
}
|
||||
|
|
|
@ -37,8 +37,8 @@ class SoftmaxBaseCPUKernel : public LiteKernel {
|
|||
int Run() override { return 0; }
|
||||
|
||||
protected:
|
||||
int thread_count_;
|
||||
const lite::Context *ctx_;
|
||||
int thread_count_;
|
||||
SoftmaxParameter *softmax_param_;
|
||||
};
|
||||
} // namespace mindspore::kernel
|
||||
|
|
|
@ -39,8 +39,8 @@ class SplitBaseCPUKernel : public LiteKernel {
|
|||
int Run() override { return 0; }
|
||||
|
||||
protected:
|
||||
int thread_count_;
|
||||
const Context *ctx_;
|
||||
int thread_count_;
|
||||
int thread_n_stride_;
|
||||
int thread_n_num_;
|
||||
int num_unit_;
|
||||
|
|
|
@ -41,9 +41,9 @@ class SqueezeBaseCPUKernel : public LiteKernel {
|
|||
int Run() override { return 0; }
|
||||
|
||||
protected:
|
||||
int thread_count_;
|
||||
int *axis_;
|
||||
const Context *ctx_;
|
||||
int thread_count_;
|
||||
};
|
||||
} // namespace mindspore::kernel
|
||||
|
||||
|
|
|
@ -44,7 +44,7 @@ int AddNCPUKernel::ReSize() { return RET_OK; }
|
|||
|
||||
int AddNCPUKernel::AddNParallelRun(int thread_id) {
|
||||
int count_per_thread = UP_DIV(elements_num_, op_parameter_->thread_num_);
|
||||
int count = MSMIN(count_per_thread, elements_num_ - thread_id * count_per_thread);
|
||||
int count = MSMIN(count_per_thread, static_cast<int>(elements_num_ - thread_id * count_per_thread));
|
||||
auto stride = count_per_thread * thread_id;
|
||||
auto ret = ElementAdd(in1_addr_ + stride, in2_addr_ + stride, out_addr_ + stride, count);
|
||||
if (ret != NNACL_OK) {
|
||||
|
@ -64,9 +64,9 @@ int AddNCPUKernel::Run() {
|
|||
auto input0_data = reinterpret_cast<float *>(in_tensors_[0]->Data());
|
||||
auto input1_data = reinterpret_cast<float *>(in_tensors_[1]->Data());
|
||||
auto output_data = reinterpret_cast<float *>(out_tensors_[0]->Data());
|
||||
if (elements_num_ < op_parameter_->thread_num_) {
|
||||
if (static_cast<int>(elements_num_) < op_parameter_->thread_num_) {
|
||||
ElementAdd(input0_data, input1_data, output_data, elements_num_);
|
||||
for (int i = 2; i < in_tensors_.size(); ++i) {
|
||||
for (size_t i = 2; i < in_tensors_.size(); ++i) {
|
||||
ElementAdd(reinterpret_cast<float *>(in_tensors_[i]->Data()), output_data, output_data, elements_num_);
|
||||
}
|
||||
return RET_OK;
|
||||
|
|
|
@ -36,7 +36,7 @@ int ArithmeticSelfCPUKernel::Init() {
|
|||
|
||||
int ArithmeticSelfCPUKernel::ReSize() {
|
||||
data_size_ = in_tensors_[0]->ElementsNum();
|
||||
thread_sz_count_ = MSMIN(thread_count_, data_size_);
|
||||
thread_sz_count_ = MSMIN(thread_count_, static_cast<int>(data_size_));
|
||||
thread_sz_stride_ = UP_DIV(data_size_, thread_sz_count_);
|
||||
return RET_OK;
|
||||
}
|
||||
|
@ -52,7 +52,7 @@ int ArithmeticSelfRuns(int task_id, LiteParallelGroupEnv *penv, void *cdata) {
|
|||
}
|
||||
|
||||
int ArithmeticSelfCPUKernel::DoArithmeticSelf(int task_id) {
|
||||
int size = MSMIN(thread_sz_stride_, data_size_ - task_id * thread_sz_stride_);
|
||||
int size = MSMIN(thread_sz_stride_, static_cast<int>(data_size_ - task_id * thread_sz_stride_));
|
||||
if (size <= 0) {
|
||||
return RET_OK;
|
||||
}
|
||||
|
|
|
@ -97,13 +97,13 @@ class ArithmeticSelfCPUKernel : public LiteKernel {
|
|||
int DoArithmeticSelf(int task_id);
|
||||
|
||||
private:
|
||||
int thread_count_;
|
||||
int thread_sz_count_;
|
||||
int thread_sz_stride_;
|
||||
size_t data_size_;
|
||||
ArithmeticSelfParameter *arithmeticSelfParameter_;
|
||||
ArithmeticSelfRun arithmeticSelf_run_;
|
||||
const Context *ctx_;
|
||||
int thread_count_;
|
||||
float *in_ptr_;
|
||||
float *out_ptr_;
|
||||
};
|
||||
|
|
|
@ -79,7 +79,7 @@ int BatchnormCPUKernel::ReSize() {
|
|||
auto n_dim = input_shapes.size();
|
||||
batchnorm_param_->channel_ = input_shapes[n_dim - 1];
|
||||
batchnorm_param_->unit_ = 1;
|
||||
for (int i = 0; i < n_dim - 1; i++) {
|
||||
for (size_t i = 0; i < n_dim - 1; i++) {
|
||||
batchnorm_param_->unit_ *= input_shapes[i];
|
||||
}
|
||||
batchnorm_param_->op_parameter_.thread_num_ =
|
||||
|
|
|
@ -32,7 +32,7 @@ int BiasCPUKernel::ReSize() {
|
|||
auto dims = in_tensors_[0]->shape();
|
||||
MS_ASSERT(dims.size() <= 5);
|
||||
bias_param_->ndim_ = dims.size();
|
||||
for (int i = 0; i < bias_param_->ndim_; i++) {
|
||||
for (size_t i = 0; i < bias_param_->ndim_; i++) {
|
||||
bias_param_->in_shape0_[i] = dims[i];
|
||||
bias_param_->in_shape1_[i] = 1;
|
||||
bias_param_->out_shape_[i] = dims[i];
|
||||
|
|
|
@ -43,8 +43,8 @@ class CaffePReluCPUKernel : public LiteKernel {
|
|||
int DoExcute(int task_id);
|
||||
|
||||
protected:
|
||||
int thread_count_;
|
||||
const Context *ctx_;
|
||||
int thread_count_;
|
||||
CaffePReluParameter *prelu_param_;
|
||||
|
||||
private:
|
||||
|
|
|
@ -52,7 +52,7 @@ int CastCPUKernel::ReSize() {
|
|||
if (data_num_ == 0) {
|
||||
return RET_OK;
|
||||
}
|
||||
op_parameter_->thread_num_ = MSMIN(op_parameter_->thread_num_, data_num_);
|
||||
op_parameter_->thread_num_ = MSMIN(op_parameter_->thread_num_, static_cast<int>(data_num_));
|
||||
stride_ = UP_DIV(data_num_, op_parameter_->thread_num_);
|
||||
return RET_OK;
|
||||
}
|
||||
|
|
|
@ -41,7 +41,10 @@ int EluCPUKernel::ReSize() {
|
|||
return RET_OK;
|
||||
}
|
||||
|
||||
int EluCPUKernel::DoExcute(int task_id) { Elu(input_addr, output_addr, elu_parameter_, task_id); }
|
||||
int EluCPUKernel::DoExcute(int task_id) {
|
||||
Elu(input_addr, output_addr, elu_parameter_, task_id);
|
||||
return RET_OK;
|
||||
}
|
||||
|
||||
int EluRun(int task_id, LiteParallelGroupEnv *penv, void *cdata) {
|
||||
auto EluData = reinterpret_cast<EluCPUKernel *>(cdata);
|
||||
|
|
|
@ -36,8 +36,8 @@ class EluCPUKernel : public LiteKernel {
|
|||
int DoExcute(int task_id);
|
||||
|
||||
protected:
|
||||
int thread_count_;
|
||||
const lite::Context *ctx_;
|
||||
int thread_count_;
|
||||
EluParameter *elu_parameter_;
|
||||
|
||||
private:
|
||||
|
|
|
@ -40,12 +40,12 @@ int EmbeddingLookupCPUKernel::ReSize() {
|
|||
|
||||
embedding_lookup_parameter_->layer_size_ = 1;
|
||||
auto in_shape = in_tensors_.front()->shape();
|
||||
for (int i = 1; i < in_shape.size(); ++i) {
|
||||
for (size_t i = 1; i < in_shape.size(); ++i) {
|
||||
embedding_lookup_parameter_->layer_size_ *= in_shape[i];
|
||||
}
|
||||
|
||||
embedding_lookup_parameter_->layer_num_ = 0;
|
||||
for (int i = 0; i < in_tensors_.size() - 1; ++i) {
|
||||
for (size_t i = 0; i < in_tensors_.size() - 1; ++i) {
|
||||
embedding_lookup_parameter_->layer_num_ += in_tensors_[i]->shape()[0];
|
||||
}
|
||||
|
||||
|
@ -94,7 +94,7 @@ int EmbeddingLookupCPUKernel::Run() {
|
|||
}
|
||||
|
||||
int dest_loc = 0;
|
||||
for (int i = 0; i < in_tensors_.size() - 1; i++) {
|
||||
for (size_t i = 0; i < in_tensors_.size() - 1; i++) {
|
||||
auto input_t = reinterpret_cast<float *>(in_tensors_.at(i)->Data());
|
||||
memcpy(input_addr_ + dest_loc, input_t, sizeof(float) * in_tensors_.at(i)->ElementsNum());
|
||||
dest_loc += in_tensors_.at(i)->ElementsNum();
|
||||
|
|
|
@ -43,8 +43,8 @@ class EmbeddingLookupCPUKernel : public LiteKernel {
|
|||
int DoExcute(int task_id);
|
||||
|
||||
protected:
|
||||
int thread_count_;
|
||||
const lite::Context *ctx_;
|
||||
int thread_count_;
|
||||
EmbeddingLookupParameter *embedding_lookup_parameter_;
|
||||
|
||||
private:
|
||||
|
|
|
@ -37,13 +37,13 @@ int ExpandDimsCPUKernel::Init() {
|
|||
|
||||
int ExpandDimsCPUKernel::ReSize() {
|
||||
data_size_ = in_tensors_.at(0)->ElementsNum();
|
||||
thread_sz_count_ = MSMIN(thread_count_, data_size_);
|
||||
thread_sz_count_ = MSMIN(thread_count_, static_cast<int>(data_size_));
|
||||
thread_sz_stride_ = UP_DIV(data_size_, thread_sz_count_);
|
||||
return RET_OK;
|
||||
}
|
||||
|
||||
int ExpandDimsCPUKernel::DoExpandDims(int task_id) {
|
||||
size_t size = MSMIN(thread_sz_stride_, data_size_ - task_id * thread_sz_stride_);
|
||||
size_t size = MSMIN(thread_sz_stride_, static_cast<int>(data_size_ - task_id * thread_sz_stride_));
|
||||
if (size == 0) {
|
||||
return RET_OK;
|
||||
}
|
||||
|
|
|
@ -41,13 +41,13 @@ class ExpandDimsCPUKernel : public LiteKernel {
|
|||
int DoExpandDims(int task_id);
|
||||
|
||||
private:
|
||||
int thread_count_;
|
||||
int thread_sz_count_;
|
||||
int thread_sz_stride_;
|
||||
size_t data_size_;
|
||||
float *in_ptr_;
|
||||
float *out_ptr_;
|
||||
const Context *ctx_;
|
||||
int thread_count_;
|
||||
};
|
||||
} // namespace mindspore::kernel
|
||||
|
||||
|
|
|
@ -39,13 +39,13 @@ class FillCPUKernel : public LiteKernel {
|
|||
int DoFill(int task_id);
|
||||
|
||||
private:
|
||||
int thread_count_;
|
||||
int thread_sz_count_;
|
||||
int thread_sz_stride_;
|
||||
int data_size_;
|
||||
float src_data_;
|
||||
float *out_ptr_;
|
||||
const Context *ctx_;
|
||||
int thread_count_;
|
||||
};
|
||||
} // namespace mindspore::kernel
|
||||
|
||||
|
|
|
@ -37,7 +37,7 @@ int FlattenCPUKernel::Init() {
|
|||
int FlattenCPUKernel::ReSize() {
|
||||
auto output_shape = out_tensors_[0]->shape();
|
||||
flatten_param_->size = sizeof(float);
|
||||
for (int i = 0; i < output_shape.size(); i++) {
|
||||
for (size_t i = 0; i < output_shape.size(); i++) {
|
||||
flatten_param_->size *= output_shape[i];
|
||||
}
|
||||
return RET_OK;
|
||||
|
|
|
@ -98,7 +98,7 @@ int FusedBatchnormCPUKernel::ReSize() {
|
|||
auto n_dim = input_shapes.size();
|
||||
batchnorm_param_->channel_ = input_shapes[n_dim - 1];
|
||||
batchnorm_param_->unit_ = 1;
|
||||
for (int i = 0; i < n_dim - 1; i++) {
|
||||
for (size_t i = 0; i < n_dim - 1; i++) {
|
||||
batchnorm_param_->unit_ *= input_shapes[i];
|
||||
}
|
||||
batchnorm_param_->op_parameter_.thread_num_ =
|
||||
|
|
|
@ -57,7 +57,7 @@ int GatherCPUKernel::DoGather(int task_id) {
|
|||
int indices_element_size = indices_tensor->ElementsNum();
|
||||
|
||||
const int limit = in_shape[axis_];
|
||||
for (size_t i = 0; i < indices_element_size; ++i) {
|
||||
for (int i = 0; i < indices_element_size; ++i) {
|
||||
if (indices_ptr[i] >= limit) {
|
||||
MS_LOG(ERROR) << " indice data: " << indices_ptr[i] << " is not in [ 0, " << limit - 1 << " ]";
|
||||
return RET_ERROR;
|
||||
|
|
|
@ -41,7 +41,6 @@ class GatherNdCPUKernel : public LiteKernel {
|
|||
int DoGatherNd(int task_id);
|
||||
|
||||
private:
|
||||
int thread_count_;
|
||||
int thread_sz_count_;
|
||||
int thread_sz_stride_;
|
||||
int count_;
|
||||
|
@ -50,6 +49,7 @@ class GatherNdCPUKernel : public LiteKernel {
|
|||
float *in_ptr_;
|
||||
float *out_ptr_;
|
||||
const Context *ctx_;
|
||||
int thread_count_;
|
||||
};
|
||||
} // namespace mindspore::kernel
|
||||
|
||||
|
|
|
@ -59,7 +59,7 @@ int MatmulCPUKernel::ReSize() {
|
|||
}
|
||||
}
|
||||
|
||||
for (int i = 0; i < a_shape.size() - 2; ++i) {
|
||||
for (size_t i = 0; i < a_shape.size() - 2; ++i) {
|
||||
batch *= a_shape[i];
|
||||
}
|
||||
params_->batch = batch;
|
||||
|
|
|
@ -62,7 +62,7 @@ int PadCPUKernel::ReSize() {
|
|||
return RET_ERROR;
|
||||
}
|
||||
|
||||
for (int i = 0; i < rank; i++) {
|
||||
for (size_t i = 0; i < rank; i++) {
|
||||
in_[DEFAULT_PAD_NDIMS - rank + i] = input->shape()[i];
|
||||
out_[DEFAULT_PAD_NDIMS - rank + i] = output->shape()[i];
|
||||
}
|
||||
|
|
|
@ -43,8 +43,8 @@ class PReluCPUKernel : public LiteKernel {
|
|||
int DoExcute(int task_id);
|
||||
|
||||
protected:
|
||||
int thread_count_;
|
||||
const Context *ctx_;
|
||||
int thread_count_;
|
||||
PReluParameter *prelu_param_;
|
||||
|
||||
private:
|
||||
|
|
|
@ -113,7 +113,7 @@ int ReduceCPUKernel::Run() {
|
|||
}
|
||||
tmp_shape_ = in_tensors_.at(0)->shape();
|
||||
src_data_ = static_cast<float *>(in_tensors_.at(0)->Data());
|
||||
for (int i = 0; i < data_buffers_.size(); ++i) {
|
||||
for (size_t i = 0; i < data_buffers_.size(); ++i) {
|
||||
dst_data_ = data_buffers_[i];
|
||||
int axis = axes_[i];
|
||||
outer_size_ = 1;
|
||||
|
@ -167,8 +167,8 @@ int ReduceCPUKernel::MallocTmpBuffer() {
|
|||
for (auto i = 0; i < num_axes_ - 1; i++) {
|
||||
int axis = axes_[i];
|
||||
size_t size = 1;
|
||||
for (auto j = 0; j < input_shape.size(); j++) {
|
||||
if (static_cast<size_t>(axis) != j) {
|
||||
for (size_t j = 0; j < input_shape.size(); j++) {
|
||||
if (axis != static_cast<int>(j)) {
|
||||
size *= input_shape[j];
|
||||
}
|
||||
}
|
||||
|
|
|
@ -36,7 +36,7 @@ class ReduceCPUKernel : public ReduceBaseCPUKernel {
|
|||
const mindspore::lite::PrimitiveC *primitive)
|
||||
: ReduceBaseCPUKernel(param, inputs, outputs, ctx, primitive) {}
|
||||
~ReduceCPUKernel() {
|
||||
for (auto i = 0; i < data_buffers_.size(); i++) {
|
||||
for (size_t i = 0; i < data_buffers_.size(); i++) {
|
||||
float *buffer = data_buffers_[i];
|
||||
if (buffer != nullptr) {
|
||||
free(buffer);
|
||||
|
|
|
@ -30,8 +30,8 @@ using mindspore::schema::PrimitiveType_Reverse;
|
|||
namespace mindspore::kernel {
|
||||
|
||||
int ReverseCPUKernel::Stride(int index) {
|
||||
int i, stride = 1;
|
||||
for (i = index + 1; i < in_tensors_[0]->shape().size(); ++i) {
|
||||
int stride = 1;
|
||||
for (size_t i = index + 1; i < in_tensors_[0]->shape().size(); ++i) {
|
||||
stride *= in_tensors_[0]->shape()[i];
|
||||
}
|
||||
return stride;
|
||||
|
@ -44,7 +44,7 @@ int ReverseCPUKernel::ReSize() {
|
|||
|
||||
auto *param = reinterpret_cast<ReverseParameter *>(op_parameter_);
|
||||
auto input_shape = in_tensors_[0]->shape();
|
||||
if (param->num_axis_ > input_shape.size()) {
|
||||
if (param->num_axis_ > static_cast<int>(input_shape.size())) {
|
||||
MS_LOG(ERROR) << "Reverse dims : " << param->num_axis_
|
||||
<< "is greater than input shape size :" << input_shape.size();
|
||||
return RET_ERROR;
|
||||
|
|
|
@ -46,7 +46,6 @@ class ReverseCPUKernel : public LiteKernel {
|
|||
int DoReverse(int task_id);
|
||||
|
||||
private:
|
||||
int thread_count_;
|
||||
int thread_sz_count_;
|
||||
int thread_sz_stride_;
|
||||
int data_size_;
|
||||
|
@ -54,6 +53,7 @@ class ReverseCPUKernel : public LiteKernel {
|
|||
int inCount_[REVERSE_STRIDE_MAX_SIZE];
|
||||
int outCount_[REVERSE_STRIDE_MAX_SIZE];
|
||||
const Context *ctx_;
|
||||
int thread_count_;
|
||||
int *tmp_ = nullptr;
|
||||
float *in_ptr_;
|
||||
float *out_ptr_;
|
||||
|
|
|
@ -45,7 +45,7 @@ int ReverseSequenceCPUKernel::CalcCountPreAxis(const std::vector<int> shape, int
|
|||
}
|
||||
int ReverseSequenceCPUKernel::CalcCountAfterAxis(const std::vector<int> shape, int axis) {
|
||||
int count = 1;
|
||||
for (int i = axis + 1; i < shape.size(); ++i) {
|
||||
for (size_t i = axis + 1; i < shape.size(); ++i) {
|
||||
count *= shape[i];
|
||||
}
|
||||
return count;
|
||||
|
@ -53,10 +53,8 @@ int ReverseSequenceCPUKernel::CalcCountAfterAxis(const std::vector<int> shape, i
|
|||
|
||||
int ReverseSequenceCPUKernel::ReSize() {
|
||||
auto input0 = in_tensors_.at(0);
|
||||
auto input1 = in_tensors_.at(1);
|
||||
auto output = out_tensors_.at(0);
|
||||
MS_ASSERT(input0 != nullptr);
|
||||
MS_ASSERT(input1 != nullptr);
|
||||
MS_ASSERT(output != nullptr);
|
||||
|
||||
auto para = reinterpret_cast<ReverseSequenceParameter *>(op_parameter_);
|
||||
|
|
|
@ -91,14 +91,14 @@ int ScaleCPUKernel::InitParameter() {
|
|||
for (int i = 0; i < scale_param_->axis_; i++) {
|
||||
scale_param_->outer_size_ *= in_shape[i];
|
||||
}
|
||||
for (int i = 0; i < scale_shape.size(); i++) {
|
||||
for (size_t i = 0; i < scale_shape.size(); i++) {
|
||||
if (in_shape[i + scale_param_->axis_] != scale_shape[i]) {
|
||||
MS_LOG(ERROR) << "Scale tensor shape is incorrect.";
|
||||
return RET_ERROR;
|
||||
}
|
||||
scale_param_->axis_size_ *= in_shape[i + scale_param_->axis_];
|
||||
}
|
||||
for (int i = scale_param_->axis_ + scale_shape.size(); i < in_shape.size(); i++) {
|
||||
for (size_t i = scale_param_->axis_ + scale_shape.size(); i < in_shape.size(); i++) {
|
||||
scale_param_->inner_size_ *= in_shape[i];
|
||||
}
|
||||
return RET_OK;
|
||||
|
|
|
@ -68,7 +68,7 @@ int ScatterNDCPUKernel::ReSize() {
|
|||
// check consistency of the shape indices and shape
|
||||
auto update_rank = static_cast<int>(update->shape().size());
|
||||
auto indices_shape = indices->shape();
|
||||
if (update_rank != indices->shape().size() - 1 + shape_rank - indice_unit_rank) {
|
||||
if (update_rank != static_cast<int>(indices->shape().size() - 1 + shape_rank - indice_unit_rank)) {
|
||||
MS_LOG(ERROR) << "Update, shape rank and indices rank inconsistent.";
|
||||
return RET_ERROR;
|
||||
}
|
||||
|
|
|
@ -51,7 +51,7 @@ int ShapeCPUKernel::Run() {
|
|||
return RET_ERROR;
|
||||
}
|
||||
|
||||
for (int i = 0; i < in_tensor->shape().size(); i++) {
|
||||
for (size_t i = 0; i < in_tensor->shape().size(); i++) {
|
||||
reinterpret_cast<int *>(out_tensor->Data())[i] = in_tensor->shape()[i];
|
||||
}
|
||||
|
||||
|
|
|
@ -42,7 +42,7 @@ int SliceLaunch(int thread_id, LiteParallelGroupEnv *penv, void *cdata) {
|
|||
int SliceCPUKernel::ReSize() {
|
||||
auto *param = reinterpret_cast<SliceParameter *>(op_parameter_);
|
||||
auto input_shape = in_tensors_[0]->shape();
|
||||
if (input_shape.size() != param->param_length_) {
|
||||
if (static_cast<int>(input_shape.size()) != param->param_length_) {
|
||||
MS_LOG(ERROR) << "Input begin's lenth " << param->param_length_ << "is not equal to input shape size "
|
||||
<< input_shape.size();
|
||||
return RET_ERROR;
|
||||
|
|
|
@ -42,8 +42,8 @@ class SparseToDenseCPUKernel : public LiteKernel {
|
|||
int DoExcute(int task_id);
|
||||
|
||||
protected:
|
||||
int thread_count_;
|
||||
const Context *ctx_;
|
||||
int thread_count_;
|
||||
SparseToDenseParameter *s2d_param_;
|
||||
|
||||
private:
|
||||
|
|
|
@ -38,7 +38,7 @@ int TopKCPUKernel::ReSize() {
|
|||
TopkParameter *parameter = reinterpret_cast<TopkParameter *>(op_parameter_);
|
||||
parameter->last_dim_size_ = input->shape()[input->shape().size() - 1];
|
||||
parameter->loop_num_ = 1;
|
||||
for (int i = 0; i < input->shape().size() - 1; ++i) {
|
||||
for (size_t i = 0; i < input->shape().size() - 1; ++i) {
|
||||
parameter->loop_num_ *= input->shape()[i];
|
||||
}
|
||||
return RET_OK;
|
||||
|
|
|
@ -42,10 +42,10 @@ int UnstackCPUKernel::ReSize() {
|
|||
if (para->axis_ < 0) {
|
||||
para->axis_ += shape_size;
|
||||
}
|
||||
for (size_t i = 0; i < shape_size; i++) {
|
||||
if (i < para->axis_) {
|
||||
for (size_t i = 0; i < static_cast<size_t>(shape_size); i++) {
|
||||
if (static_cast<int>(i) < para->axis_) {
|
||||
para->pre_dims_ *= input->DimensionSize(i);
|
||||
} else if (i > para->axis_) {
|
||||
} else if (static_cast<int>(i) > para->axis_) {
|
||||
para->after_dims_ *= input->DimensionSize(i);
|
||||
} else {
|
||||
para->axis_dim_ = input->DimensionSize(i);
|
||||
|
|
|
@ -42,8 +42,8 @@ class WhereCPUKernel : public LiteKernel {
|
|||
int DoExcute(int task_id);
|
||||
|
||||
protected:
|
||||
int thread_count_;
|
||||
const Context *ctx_;
|
||||
int thread_count_;
|
||||
WhereParameter *where_param_;
|
||||
|
||||
private:
|
||||
|
|
|
@ -60,7 +60,7 @@ int ArithmeticSelfInt8CPUKernel::Init() {
|
|||
|
||||
int ArithmeticSelfInt8CPUKernel::ReSize() {
|
||||
data_size_ = in_tensors_[0]->ElementsNum();
|
||||
thread_sz_count_ = MSMIN(thread_count_, data_size_);
|
||||
thread_sz_count_ = MSMIN(thread_count_, static_cast<int>(data_size_));
|
||||
thread_sz_stride_ = UP_DIV(data_size_, thread_sz_count_);
|
||||
return RET_OK;
|
||||
}
|
||||
|
@ -76,7 +76,7 @@ int ArithmeticSelfInt8Runs(int task_id, LiteParallelGroupEnv *penv, void *cdata)
|
|||
}
|
||||
|
||||
int ArithmeticSelfInt8CPUKernel::DoArithmeticSelf(int task_id) {
|
||||
int size = MSMIN(thread_sz_stride_, data_size_ - task_id * thread_sz_stride_);
|
||||
int size = MSMIN(thread_sz_stride_, static_cast<int>(data_size_ - task_id * thread_sz_stride_));
|
||||
if (size <= 0) {
|
||||
return RET_OK;
|
||||
}
|
||||
|
|
|
@ -93,13 +93,13 @@ class ArithmeticSelfInt8CPUKernel : public LiteKernel {
|
|||
int DoArithmeticSelf(int task_id);
|
||||
|
||||
private:
|
||||
int thread_count_;
|
||||
int thread_sz_count_;
|
||||
int thread_sz_stride_;
|
||||
size_t data_size_;
|
||||
ArithmeticSelfParameter *para_;
|
||||
ArithmeticSelfInt8Run arithmeticSelf_run_;
|
||||
const Context *ctx_;
|
||||
int thread_count_;
|
||||
int8_t *in_ptr_;
|
||||
int8_t *out_ptr_;
|
||||
};
|
||||
|
|
|
@ -143,7 +143,7 @@ int BatchnormInt8CPUKernel::Init() {
|
|||
auto n_dim = input_shapes.size();
|
||||
batchnorm_param_->channel_ = input_shapes[n_dim - 1];
|
||||
batchnorm_param_->units_ = 1;
|
||||
for (int i = 0; i < n_dim - 1; i++) {
|
||||
for (size_t i = 0; i < n_dim - 1; i++) {
|
||||
batchnorm_param_->units_ *= input_shapes[i];
|
||||
}
|
||||
batchnorm_param_->op_parameter_.thread_num_ =
|
||||
|
@ -169,7 +169,7 @@ int BatchnormInt8CPUKernel::Init() {
|
|||
int BatchnormInt8CPUKernel::ReSize() {
|
||||
auto input_shapes = in_tensors_[0]->shape();
|
||||
batchnorm_param_->unit_ = 1;
|
||||
for (int i = 0; i < input_shapes.size() - 1; i++) {
|
||||
for (size_t i = 0; i < input_shapes.size() - 1; i++) {
|
||||
batchnorm_param_->unit_ *= input_shapes[i];
|
||||
}
|
||||
return RET_OK;
|
||||
|
|
|
@ -36,7 +36,7 @@ int BiasAddInt8CPUKernel::ReSize() {
|
|||
auto bias_param = reinterpret_cast<ArithmeticParameter *>(op_parameter_);
|
||||
auto dims = in_tensors_[0]->shape();
|
||||
bias_param->ndim_ = dims.size();
|
||||
for (int i = 0; i < bias_param->ndim_; i++) {
|
||||
for (size_t i = 0; i < bias_param->ndim_; i++) {
|
||||
bias_param->in_shape0_[i] = dims[i];
|
||||
bias_param->in_shape1_[i] = 1;
|
||||
bias_param->out_shape_[i] = dims[i];
|
||||
|
|
|
@ -65,9 +65,6 @@ int ConcatInt8CPUKernel::ReSize() {
|
|||
if (ret != RET_OK) {
|
||||
return ret;
|
||||
}
|
||||
if (concat_param_->input_shapes_ != nullptr) {
|
||||
// free(concat_param_->input_shapes_);
|
||||
}
|
||||
auto input_num = in_tensors_.size();
|
||||
concat_param_->input_num_ = input_num;
|
||||
concat_param_->input_shapes_ = reinterpret_cast<const int **>(malloc(sizeof(int *) * input_num));
|
||||
|
@ -82,7 +79,7 @@ int ConcatInt8CPUKernel::ReSize() {
|
|||
|
||||
int64_t after_axis_size = 1;
|
||||
auto output_tensor = out_tensors_.at(kOutputIndex);
|
||||
int output_dim = output_tensor->shape().size();
|
||||
size_t output_dim = output_tensor->shape().size();
|
||||
concat_param_->output_shapes_ = output_tensor->shape().data();
|
||||
for (size_t i = axis_ + 1; i < output_dim; i++) {
|
||||
after_axis_size *= concat_param_->output_shapes_[i];
|
||||
|
@ -102,7 +99,7 @@ int ConcatInt8CPUKernel::Run() {
|
|||
count_unit_ = thread_count_ > 1 ? UP_DIV(before_axis_size, thread_count_) : before_axis_size;
|
||||
concat_param_->count_unit_ = count_unit_;
|
||||
|
||||
for (size_t i = 0; i < input_num; i++) {
|
||||
for (int i = 0; i < input_num; i++) {
|
||||
input_data_[i] = static_cast<int8_t *>(in_tensors_.at(i)->Data());
|
||||
}
|
||||
output_data_ = reinterpret_cast<int8_t *>(out_tensors_.at(0)->Data());
|
||||
|
|
|
@ -122,7 +122,7 @@ int DeConvInt8CPUKernel::InitParam() {
|
|||
|
||||
/* optimize normal -> same data layout */
|
||||
input_trans_func_ = RowMajor2Row16x4MajorInt8;
|
||||
size_t oc4 = UP_DIV(conv_param_->output_channel_, C4NUM);
|
||||
int oc4 = UP_DIV(conv_param_->output_channel_, C4NUM);
|
||||
thread_count_ = MSMIN(op_parameter_->thread_num_, oc4);
|
||||
thread_stride_ = UP_DIV(oc4, thread_count_);
|
||||
return RET_OK;
|
||||
|
|
|
@ -38,7 +38,7 @@ int MatmulInt8CPUKernel::ReSize() {
|
|||
int batch = 1;
|
||||
auto x_shape = in_tensors_[0]->shape();
|
||||
auto o_shape = out_tensors_[0]->shape();
|
||||
for (int i = 0; i < x_shape.size() - 2; ++i) {
|
||||
for (size_t i = 0; i < x_shape.size() - 2; ++i) {
|
||||
batch *= x_shape[i];
|
||||
}
|
||||
params_->batch = batch;
|
||||
|
|
|
@ -57,6 +57,7 @@ int PreluInt8CPUKernel::ReSize() {
|
|||
quant_prelu_parm_->element_num = in_tensors_[0]->Size();
|
||||
quant_prelu_parm_->in_shape_ = input_tensor->shape().data();
|
||||
quant_prelu_parm_->out_shape_ = out_tensor->shape().data();
|
||||
return RET_OK;
|
||||
}
|
||||
|
||||
int PreluInt8CPUKernel::Run() {
|
||||
|
|
|
@ -184,8 +184,8 @@ int ReduceInt8CPUKernel::MallocTmpBuffer() {
|
|||
for (auto i = 0; i < num_axes_ - 1; i++) {
|
||||
int axis = axes_[i];
|
||||
size_t size = 1;
|
||||
for (auto j = 0; j < input_shape.size(); j++) {
|
||||
if (static_cast<size_t>(axis) != j) {
|
||||
for (size_t j = 0; j < input_shape.size(); j++) {
|
||||
if (axis != static_cast<int>(j)) {
|
||||
size *= input_shape[j];
|
||||
}
|
||||
}
|
||||
|
@ -258,7 +258,7 @@ int ReduceInt8CPUKernel::Run() {
|
|||
tmp_shape_ = in_tensors_.at(0)->shape();
|
||||
src_data_ = begin_src_data_;
|
||||
|
||||
for (int i = 0; i < data_buffers_.size(); ++i) {
|
||||
for (size_t i = 0; i < data_buffers_.size(); ++i) {
|
||||
if (mode_ == static_cast<int>(schema::ReduceMode_ReduceMean)) {
|
||||
quant_arg_.mean_multiplier_ = mean_multipliers_[i]->multiplier_;
|
||||
quant_arg_.mean_left_shift_ = mean_multipliers_[i]->left_shift_;
|
||||
|
|
|
@ -133,7 +133,7 @@ int SqueezeInt8CPUKernel::Run() {
|
|||
auto input_type = in_tensors_[i]->data_type();
|
||||
if (input_type == kNumberTypeUInt8) {
|
||||
uint8_t *input_tmp = reinterpret_cast<uint8_t *>(in_tensors_[i]->Data());
|
||||
for (size_t j = 0; j < input_size; j++) {
|
||||
for (int j = 0; j < input_size; j++) {
|
||||
inputs_array[i][j] = (int8_t)(input_tmp[j] - 128);
|
||||
}
|
||||
for (size_t j = 0; j < input_dim; j++) {
|
||||
|
@ -148,12 +148,12 @@ int SqueezeInt8CPUKernel::Run() {
|
|||
auto output_type = out_tensors_[0]->data_type();
|
||||
if (output_type == kNumberTypeUInt8) {
|
||||
auto output_size = quant_Squeeze_parm_->output_size_;
|
||||
for (size_t i = 0; i < output_size; i++) {
|
||||
for (int i = 0; i < output_size; i++) {
|
||||
output_addr[i] = (uint8_t)(output_addr[i] + 128);
|
||||
}
|
||||
}
|
||||
|
||||
for (int i = 0; i < input_dim; i++) {
|
||||
for (size_t i = 0; i < input_dim; i++) {
|
||||
free(*(inputs_array + i));
|
||||
}
|
||||
|
||||
|
|
|
@ -40,7 +40,7 @@ int TopKInt8CPUKernel::ReSize() {
|
|||
lite::tensor::Tensor *input = in_tensors_.at(0);
|
||||
parameter->last_dim_size_ = input->shape()[input->shape().size() - 1];
|
||||
parameter->loop_num_ = 1;
|
||||
for (int i = 0; i < input->shape().size() - 1; ++i) {
|
||||
for (size_t i = 0; i < input->shape().size() - 1; ++i) {
|
||||
parameter->loop_num_ *= input->shape()[i];
|
||||
}
|
||||
return RET_OK;
|
||||
|
|
|
@ -44,13 +44,13 @@ class Unsqueezeint8CPUKernel : public LiteKernel {
|
|||
private:
|
||||
UnSqueezeQuantArg *quant_Unsqueeze_parm_;
|
||||
UnSqueezeParameter *Unsq_para_;
|
||||
int thread_count_;
|
||||
int thread_sz_count_;
|
||||
int thread_sz_stride_;
|
||||
int data_size_;
|
||||
float *in_ptr_;
|
||||
float *out_ptr_;
|
||||
const Context *ctx_;
|
||||
int thread_count_;
|
||||
};
|
||||
} // namespace mindspore::kernel
|
||||
|
||||
|
|
|
@ -61,7 +61,7 @@ void ComputeStrides(int *shape, int *strides, int ndim) {
|
|||
}
|
||||
|
||||
void CalcMultiplesAndStrides(ArithmeticParameter *param) {
|
||||
for (auto i = 0; i < param->ndim_; i++) {
|
||||
for (size_t i = 0; i < param->ndim_; i++) {
|
||||
param->multiples0_[i] = param->out_shape_[i] / param->in_shape0_[i];
|
||||
param->multiples1_[i] = param->out_shape_[i] / param->in_shape1_[i];
|
||||
}
|
||||
|
|
|
@ -50,7 +50,6 @@ void BatchToSpaceForNHWC(const void *input, void *output, const int *in_shape, i
|
|||
const int *crops, int data_size) {
|
||||
int block_h = block[0];
|
||||
int block_w = block[1];
|
||||
int in_n = in_shape[0];
|
||||
int in_h = in_shape[1];
|
||||
int in_w = in_shape[2];
|
||||
int in_c = in_shape[3];
|
||||
|
|
|
@ -69,7 +69,7 @@ int DeConvPostFp32C8x8(const float *src, float *tmp, const float *bias, float *d
|
|||
int src_index = ih * src_ih_stride + iw * src_iw_stride + kh * src_kh_stride + kw * src_kw_stride;
|
||||
int dst_index = oh * dst_oh_stride + ow * dst_ow_stride + kh * dst_kh_stride + kw * dst_kw_stride;
|
||||
float *tmp_dst = dst_ptr + dst_index;
|
||||
float *tmp_src = src_ptr + src_index;
|
||||
const float *tmp_src = src_ptr + src_index;
|
||||
#ifdef ENABLE_ARM64
|
||||
asm volatile(
|
||||
"mov x0, %[tmp_src] \n"
|
||||
|
|
|
@ -35,8 +35,6 @@ int ROIPooling(float *in_ptr, float *out_ptr, float *roi, int tid, ROIPoolingPar
|
|||
int scale = param->scale_;
|
||||
int pooled_height = param->pooledH_;
|
||||
int pooled_width = param->pooledW_;
|
||||
int *in_strides = &(param->in_strides_);
|
||||
int *out_strides = &(param->out_strides_);
|
||||
int roi_stride = 5;
|
||||
int roi_ind_st = roi_st * roi_stride;
|
||||
float *max_c = malloc(channels_ * sizeof(float));
|
||||
|
@ -55,9 +53,8 @@ int ROIPooling(float *in_ptr, float *out_ptr, float *roi, int tid, ROIPoolingPar
|
|||
|
||||
float bin_size_h = (float)roi_height / (float)pooled_height;
|
||||
float bin_size_w = (float)roi_width / (float)pooled_width;
|
||||
float *batch_data = in_ptr + in_strides[kNHWC_N] * roi_batch_ind;
|
||||
float *batch_data = in_ptr + param->in_strides_[kNHWC_N] * roi_batch_ind;
|
||||
|
||||
int out_ind = i * out_strides[0];
|
||||
for (int ph = 0; ph < pooled_height; ++ph) {
|
||||
for (int pw = 0; pw < pooled_width; ++pw) {
|
||||
int hstart = (int)floorf(ph * bin_size_h); // block xi_1
|
||||
|
@ -76,17 +73,17 @@ int ROIPooling(float *in_ptr, float *out_ptr, float *roi, int tid, ROIPoolingPar
|
|||
max_c[j] = 0;
|
||||
}
|
||||
}
|
||||
int pooled_index = i * out_strides[0] + ph * out_strides[1] + pw * out_strides[2];
|
||||
int bd_index = hstart * in_strides[1];
|
||||
int pooled_index = i * param->out_strides_[0] + ph * param->out_strides_[1] + pw * param->out_strides_[2];
|
||||
int bd_index = hstart * param->in_strides_[1];
|
||||
for (int h = hstart; h < hend; ++h) {
|
||||
int wi = bd_index + wstart * in_strides[2];
|
||||
int wi = bd_index + wstart * param->in_strides_[2];
|
||||
for (int w = wstart; w < wend; ++w) {
|
||||
for (int c = 0; c < channels_; ++c) {
|
||||
max_c[c] = MSMAX(batch_data[wi + c], max_c[c]);
|
||||
}
|
||||
wi += in_strides[2];
|
||||
wi += param->in_strides_[2];
|
||||
} // in_w end;
|
||||
bd_index += in_strides[1];
|
||||
bd_index += param->in_strides_[1];
|
||||
} // in_h end
|
||||
for (int j = 0; j < channels_; ++j) {
|
||||
out_ptr[pooled_index + j] = max_c[j];
|
||||
|
|
|
@ -141,7 +141,7 @@ int SpaceToBatch(const float *input, float *output, SpaceToBatchParameter param,
|
|||
if (input == NULL || output == NULL) {
|
||||
return NNACL_NULL_PTR;
|
||||
}
|
||||
auto ret =
|
||||
int ret =
|
||||
SpaceToBatchForNHWC(input, output, param.padded_in_shape_, param.n_dims_, param.block_sizes_, h_start, h_end);
|
||||
return ret;
|
||||
}
|
||||
|
|
|
@ -58,7 +58,6 @@ void BatchToSpaceForNHWCInt8(const int8_t *input, int8_t *output, const int *in_
|
|||
const int *crops, QuantArg *in_quant_arg, QuantArg *out_quant_arg) {
|
||||
int block_h = block[0];
|
||||
int block_w = block[1];
|
||||
int in_n = in_shape[0];
|
||||
int in_h = in_shape[1];
|
||||
int in_w = in_shape[2];
|
||||
int in_c = in_shape[3];
|
||||
|
|
|
@ -31,7 +31,7 @@ void Crop(const int8_t *input, int8_t *output, int task_id, CropParameter *para)
|
|||
Crop3D(input, output, task_id, para);
|
||||
break;
|
||||
case 4:
|
||||
Crop4D(input, output, task_id, para);
|
||||
Int8Crop4D(input, output, task_id, para);
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
|
|
@ -266,7 +266,7 @@ int ReduceMinLastAxis(const int outer_size, const int inner_size, const int axis
|
|||
}
|
||||
int32_t tmp_scaled =
|
||||
RoundingDivideByPOT(SaturatingRoundingDoublingHighMul(
|
||||
(tmp - quant->in_zp_) * (1 << (unsigned int)quant->in_out_left_shift_ + base_offset),
|
||||
(tmp - quant->in_zp_) * (1 << ((unsigned int)quant->in_out_left_shift_ + base_offset)),
|
||||
quant->in_out_multiplier_),
|
||||
quant->in_out_right_shift_ + base_offset);
|
||||
if (isAddOverflow(tmp_scaled, quant->out_zp_)) {
|
||||
|
|
|
@ -33,14 +33,13 @@ int ResizeBilinearInt8(const int8_t *input_data, int8_t *output_data, const int
|
|||
|
||||
int32_t new_height = output_shape[1];
|
||||
int32_t new_width = output_shape[2];
|
||||
int32_t height_scale, width_scale;
|
||||
int32_t height_scale = 0, width_scale = 0;
|
||||
ComputeScale(in_h, new_height, align_corners, &height_scale);
|
||||
ComputeScale(in_w, new_width, align_corners, &width_scale);
|
||||
|
||||
int n, h, w, c;
|
||||
for (n = 0; n < in_n; n++) {
|
||||
for (h = tid; h < new_height; h += thread_num) {
|
||||
// float actual_y = (float)h * height_scale;
|
||||
const int base_offset = 20;
|
||||
int scaled_actual_y;
|
||||
int bottom, top;
|
||||
|
@ -99,10 +98,10 @@ int ResizeNearestNeighborInt8Simple(const int8_t *input_data, int8_t *output_dat
|
|||
|
||||
for (batch = 0; batch < output_shape[0]; batch++) {
|
||||
for (y = tid; y < output_shape[1]; y += thread_num) {
|
||||
int input_y;
|
||||
int input_y = 0;
|
||||
ComputeNearestNeighborInt(y, in_h, new_height, align_corners, &input_y);
|
||||
for (x = 0; x < output_shape[2]; x++) {
|
||||
int input_x;
|
||||
int input_x = 0;
|
||||
ComputeNearestNeighborInt(x, in_w, new_width, align_corners, &input_x);
|
||||
int in_offset = offset(input_shape, batch, input_y, input_x, 0);
|
||||
int out_offset = offset(output_shape, batch, y, x, 0);
|
||||
|
@ -159,10 +158,10 @@ int ResizeNearestNeighborInt8(const int8_t *input_data, int8_t *output_data, con
|
|||
|
||||
for (batch = 0; batch < output_shape[0]; batch++) {
|
||||
for (y = tid; y < output_shape[1]; y += thread_num) {
|
||||
int input_y;
|
||||
int input_y = 0;
|
||||
ComputeNearestNeighborInt(y, in_h, new_height, align_corners, &input_y);
|
||||
for (x = 0; x < output_shape[2]; x++) {
|
||||
int input_x;
|
||||
int input_x = 0;
|
||||
ComputeNearestNeighborInt(x, in_w, new_width, align_corners, &input_x);
|
||||
for (c = 0; c < output_shape[3]; c++) {
|
||||
int in_offset = offset(input_shape, batch, input_y, input_x, c);
|
||||
|
|
|
@ -961,7 +961,7 @@ void PackNHWCToNCHWFp32(const void *src, void *dst, int batches, int plane, int
|
|||
#endif
|
||||
}
|
||||
for (; c < channel; c++) {
|
||||
float *src_ptr = src_batch + hw * channel + c;
|
||||
const float *src_ptr = src_batch + hw * channel + c;
|
||||
float *dst_ptr = dst_batch + c * plane + hw;
|
||||
for (size_t i = 0; i < C8NUM; i++) {
|
||||
dst_ptr[i] = src_ptr[i * channel];
|
||||
|
@ -969,7 +969,7 @@ void PackNHWCToNCHWFp32(const void *src, void *dst, int batches, int plane, int
|
|||
}
|
||||
}
|
||||
for (; hw < plane; hw++) {
|
||||
float *src_ptr = src_batch + hw * channel;
|
||||
const float *src_ptr = src_batch + hw * channel;
|
||||
float *dst_ptr = dst_batch + hw;
|
||||
for (size_t i = 0; i < channel; i++) {
|
||||
dst_ptr[i * plane] = src_ptr[i];
|
||||
|
@ -1023,10 +1023,10 @@ void PackDepthwiseInt8Input(const int8_t *src, int16_t *dst, const ConvParameter
|
|||
int unit = conv_param->input_h_ * conv_param->input_w_;
|
||||
|
||||
for (int b = 0; b < conv_param->input_batch_; b++) {
|
||||
int8_t *src_b = src + b * unit * conv_param->input_channel_;
|
||||
const int8_t *src_b = src + b * unit * conv_param->input_channel_;
|
||||
int16_t *dst_b = dst + b * unit * ic4 * C4NUM;
|
||||
for (int k = 0; k < unit; k++) {
|
||||
int8_t *src_k = src_b + k * conv_param->input_channel_;
|
||||
const int8_t *src_k = src_b + k * conv_param->input_channel_;
|
||||
int16_t *dst_k = dst_b + k * ic4 * C4NUM;
|
||||
for (int c = 0; c < conv_param->input_channel_; c++) {
|
||||
dst_k[c] = (int16_t)(src_k[c] - input_zp);
|
||||
|
@ -1044,10 +1044,10 @@ void PackDepthwiseInt8Weight(const int8_t *origin_weight, int16_t *packed_weight
|
|||
}
|
||||
int c4_block_num = c / C4NUM;
|
||||
int c4_block_rem = c % C4NUM;
|
||||
int8_t *src_c = origin_weight + c * unit;
|
||||
const int8_t *src_c = origin_weight + c * unit;
|
||||
int16_t *dst_c = packed_weight_ + c4_block_num * unit * C4NUM;
|
||||
for (int k = 0; k < unit; k++) {
|
||||
int8_t *src_kernel = src_c + k;
|
||||
const int8_t *src_kernel = src_c + k;
|
||||
int16_t *dst_kernel = dst_c + C4NUM * k + c4_block_rem;
|
||||
*dst_kernel = (int16_t)(src_kernel[0] - weight_zp);
|
||||
}
|
||||
|
|
|
@ -30,14 +30,14 @@ void QuantizeMultiplierSmallerThanOne(double double_multiplier, int32_t *quantiz
|
|||
if (quantized_multiplier == NULL || right_shift == NULL) {
|
||||
return;
|
||||
}
|
||||
int shift;
|
||||
int shift = 0;
|
||||
QuantizeMultiplier(double_multiplier, quantized_multiplier, &shift);
|
||||
*right_shift = -shift;
|
||||
}
|
||||
|
||||
void QuantizeRoundParameter(double double_multiplier, int32_t *quantized_multiplier, int *left_shift,
|
||||
int *right_shift) {
|
||||
int shift;
|
||||
int shift = 0;
|
||||
QuantizeMultiplierSmallerThanOne(double_multiplier, quantized_multiplier, &shift);
|
||||
shift = -shift;
|
||||
if (shift < 0) {
|
||||
|
|
|
@ -913,7 +913,7 @@ void Conv3x3Int8FilterTransform(const int16_t *weight_data, int16_t *trans_weigh
|
|||
int src_oc_offset = o * iC8 * C8NUM * kernel_plane;
|
||||
int dst_oc_offset = oc4_block_num * C4NUM * iC8 * C8NUM * input_unit * input_unit + oc4_block_rem;
|
||||
for (int i = 0; i < iC8; i++) {
|
||||
int16_t *src_ic8_ptr = weight_data + src_oc_offset + i * kernel_plane * C8NUM;
|
||||
const int16_t *src_ic8_ptr = weight_data + src_oc_offset + i * kernel_plane * C8NUM;
|
||||
int16_t *dst_ic8_ptr = trans_weight + dst_oc_offset + i * C4NUM * C8NUM;
|
||||
#ifdef ENABLE_ARM
|
||||
int16x8_t g00 = vld1q_s16(src_ic8_ptr);
|
||||
|
@ -1107,7 +1107,7 @@ void Conv3x3Int8FilterTransform(const int16_t *weight_data, int16_t *trans_weigh
|
|||
dst_ic8_ptr[28 + 15 * dst_step] = m33[7];
|
||||
#else
|
||||
for (int j = 0; j < C8NUM; j++) {
|
||||
int16_t *local_ptr = src_ic8_ptr + j;
|
||||
const int16_t *local_ptr = src_ic8_ptr + j;
|
||||
int16_t dst00 = local_ptr[0] * 2;
|
||||
int16_t dst01 = (local_ptr + 8)[0] * 2;
|
||||
int16_t dst02 = (local_ptr + 16)[0] * 2;
|
||||
|
|
|
@ -29,6 +29,7 @@ int ParallelExecutor::Prepare(std::vector<mindspore::kernel::LiteKernel *> &kern
|
|||
for (mindspore::kernel::LiteKernel *kernel : kernels) {
|
||||
refCount[kernel] = kernel->out_kernels().size();
|
||||
}
|
||||
return RET_OK;
|
||||
}
|
||||
|
||||
void ParallelExecutor::PrepareReadyKernels(const std::vector<mindspore::kernel::LiteKernel *> &kernels) {
|
||||
|
|
|
@ -235,17 +235,17 @@ bool ThreadPool::SetThreadPool() {
|
|||
} else if (localMaxThreadNums > kDefaultMaxThreadNums) {
|
||||
localMaxThreadNums = kDefaultMaxThreadNums;
|
||||
}
|
||||
if (configThreadNums > kDefaultMaxThreadNums) {
|
||||
if (configThreadNums > static_cast<int>(kDefaultMaxThreadNums)) {
|
||||
configThreadNums = kDefaultMaxThreadNums;
|
||||
}
|
||||
int addNum = 0;
|
||||
if (configThreadNums > kDefaultMaxThreadNums) {
|
||||
if (configThreadNums > static_cast<int>(kDefaultMaxThreadNums)) {
|
||||
addNum = configThreadNums - curThreadRunNums;
|
||||
} else if (localMaxThreadNums > curThreadNums) {
|
||||
} else if (static_cast<int>(localMaxThreadNums) > curThreadNums) {
|
||||
addNum = localMaxThreadNums - curThreadNums;
|
||||
}
|
||||
AddNewThread(addNum);
|
||||
if (curThreadRunNums > localMaxThreadNums) {
|
||||
if (curThreadRunNums > static_cast<int>(localMaxThreadNums)) {
|
||||
SubRunThread(localMaxThreadNums);
|
||||
} else {
|
||||
AddRunThread(localMaxThreadNums);
|
||||
|
@ -376,7 +376,7 @@ bool ThreadPool::DistributeTask(ThreadPoolTask *task, int numTask) {
|
|||
|
||||
void ThreadPool::AddRunThread(int num) {
|
||||
int activeNums = num - curThreadRunNums;
|
||||
if (activeNums <= 0 || activateList.size() < activeNums) {
|
||||
if (activeNums <= 0 || static_cast<int>(activateList.size()) < activeNums) {
|
||||
return;
|
||||
}
|
||||
for (int i = curThreadRunNums - 1, j = 0; j < activeNums; ++i, ++j) {
|
||||
|
|
|
@ -6,6 +6,10 @@ include_directories(${TOP_DIR})
|
|||
include_directories(${TEST_DIR})
|
||||
include(${CMAKE_CURRENT_SOURCE_DIR}/../../../cmake/dependency_gtest.cmake)
|
||||
|
||||
string(REPLACE " -Werror " " " CMAKE_C_FLAGS "${CMAKE_C_FLAGS}")
|
||||
string(REPLACE " -Werror " " " CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS}")
|
||||
STRING(REPLACE " fvisibility=hidden " " -fvisibility=default " CMAKE_C_FLAGS "${CMAKE_C_FLAGS}")
|
||||
STRING(REPLACE " fvisibility=hidden " " -fvisibility=default " CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS}")
|
||||
### anf src
|
||||
set(ANF_SRC
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/../../core/ir/meta_tensor.cc
|
||||
|
|
|
@ -28,6 +28,10 @@
|
|||
|
||||
namespace mindspore {
|
||||
namespace lite {
|
||||
static const char *DELIM_COLON = ":";
|
||||
static const char *DELIM_COMMA = ",";
|
||||
static const char *DELIM_SLASH = "/";
|
||||
|
||||
int Benchmark::GenerateRandomData(size_t size, void *data) {
|
||||
MS_ASSERT(data != nullptr);
|
||||
char *castedData = static_cast<char *>(data);
|
||||
|
@ -85,7 +89,7 @@ int Benchmark::ReadInputFile() {
|
|||
MS_LOG(ERROR) << "Not supported image input";
|
||||
return RET_ERROR;
|
||||
} else {
|
||||
for (auto i = 0; i < _flags->input_data_list.size(); i++) {
|
||||
for (size_t i = 0; i < _flags->input_data_list.size(); i++) {
|
||||
auto cur_tensor = msInputs.at(i);
|
||||
MS_ASSERT(cur_tensor != nullptr);
|
||||
size_t size;
|
||||
|
|
|
@ -35,16 +35,6 @@ OpDefCopyer GetSimpleOpCopyer() {
|
|||
newCNode->quantType = inCNode->quantType;
|
||||
newCNode->primitive = std::make_unique<schema::PrimitiveT>();
|
||||
newCNode->primitive->value.type = inCNode->primitive->value.type;
|
||||
// newCNode->quantParam.clear();
|
||||
// for (size_t i = 0; i < inCNode->quantParam.size(); i++) {
|
||||
// auto &quantParam = inCNode->quantParam.at(i);
|
||||
// auto quantParamCopy = CopyQuantParamArrayT(quantParam);
|
||||
// if (quantParamCopy == nullptr) {
|
||||
// //MS_LOG(ERROR)("CopyQuantParamArray return nullptr, node: %s", inOpDef->name.c_str());
|
||||
// return nullptr;
|
||||
// }
|
||||
// newCNode->quantParam.emplace_back(std::move(quantParamCopy));
|
||||
// }
|
||||
return std::move(newCNode);
|
||||
};
|
||||
}
|
||||
|
@ -139,20 +129,18 @@ STATUS IsolateNode(schema::MetaGraphT *graphT, CNodeT *node) {
|
|||
auto inputTensorIdxes = node->inputIndex;
|
||||
auto outputTensorIdxes = node->outputIndex;
|
||||
if (inputTensorIdxes.empty()) {
|
||||
// MS_LOG(ERROR)("Node %s should has no inputs", node->name.c_str());
|
||||
MS_LOG(ERROR) << "Node " << node->name.c_str() << "should has no inputs";
|
||||
return RET_ERROR;
|
||||
}
|
||||
if (outputTensorIdxes.size() != 1) {
|
||||
// MS_LOG(ERROR)("FakeQuantNode %s should has 1 output, in fact: %zu", node->name.c_str(),
|
||||
// outputTensorIdxes.size());
|
||||
MS_LOG(ERROR) << "FakeQuantNode " << node->name.c_str() \
|
||||
<< "should has 1 output, in fact: " << outputTensorIdxes.size();
|
||||
return RET_ERROR;
|
||||
}
|
||||
auto inDataTensorIdx = inputTensorIdxes.front();
|
||||
auto outDataTensorIdx = outputTensorIdxes.front();
|
||||
|
||||
MS_ASSERT(graphT->allTensors.size() > inDataTensorIdx);
|
||||
const auto &inDataTensor = graphT->allTensors.at(inDataTensorIdx);
|
||||
MS_ASSERT(inDataTensor != nullptr);
|
||||
auto &gOutTensorIdx = graphT->outputIndex;
|
||||
for (auto iter = gOutTensorIdx.begin(); iter != gOutTensorIdx.end(); iter++) {
|
||||
if (*iter == outDataTensorIdx) {
|
||||
|
@ -186,20 +174,13 @@ STATUS IsolateNode(schema::MetaGraphT *graphT, CNodeT *node) {
|
|||
|
||||
STATUS IsolateOneWayNode(schema::MetaGraphT *graph, size_t subGraphIdx, size_t nodeIdx, bool removeTensor) {
|
||||
MS_ASSERT(graph != nullptr);
|
||||
/*
|
||||
if (graph->subgraphs.size() <= subGraphIdx) {
|
||||
//MS_LOG(ERROR)("subGraphIdx out of range: %zu", subGraphIdx);
|
||||
return RET_PARAM_INVALID;
|
||||
}
|
||||
*/
|
||||
// return IsolateOneWayNode(graph->subgraphs.at(subGraphIdx).get(), nodeIdx, removeTensor);
|
||||
return IsolateOneWayNode(graph, nodeIdx, removeTensor);
|
||||
}
|
||||
|
||||
STATUS IsolateOneWayNode(schema::MetaGraphT *graphT, size_t nodeIdx, bool removeTensor) {
|
||||
MS_ASSERT(graphT != nullptr);
|
||||
if (graphT->nodes.size() <= nodeIdx) {
|
||||
// MS_LOG(ERROR)("nodeIdx out of range: %zu", nodeIdx);
|
||||
MS_LOG(ERROR) << "nodeIdx out of range: " << nodeIdx;
|
||||
return RET_PARAM_INVALID;
|
||||
}
|
||||
|
||||
|
@ -208,11 +189,11 @@ STATUS IsolateOneWayNode(schema::MetaGraphT *graphT, size_t nodeIdx, bool remove
|
|||
auto outputTensorIdxes = node->outputIndex;
|
||||
auto preNodeIdxes = GetInputNodeIdx(*graphT, nodeIdx);
|
||||
if (preNodeIdxes.size() > 1 || outputTensorIdxes.size() > 1) {
|
||||
// MS_LOG(ERROR)("Only support node who has no more than one input and one output");
|
||||
MS_LOG(ERROR) << "Only support node who has no more than one input and one output";
|
||||
return RET_ERROR;
|
||||
}
|
||||
if (inputTensorIdxes.empty()) {
|
||||
// MS_LOG(ERROR)("Error, %zuth node has no input tensor", nodeIdx);
|
||||
MS_LOG(ERROR) << "Error, " << nodeIdx << "th node has no input tensor";
|
||||
return RET_ERROR;
|
||||
}
|
||||
auto inDataTensorIdx = inputTensorIdxes.front();
|
||||
|
@ -247,7 +228,7 @@ STATUS IsolateOneWayNode(schema::MetaGraphT *graphT, size_t nodeIdx, bool remove
|
|||
// remove all node's outputTensors
|
||||
auto status = RemoveTensor(graphT, outputTensorIdxes);
|
||||
if (status != RET_OK) {
|
||||
// MS_LOG(ERROR)("RemoveOutputTensors of node %s failed", node->name.c_str());
|
||||
MS_LOG(ERROR) << "RemoveOutputTensors of node " << node->name.c_str() << "failed";
|
||||
return RET_ERROR;
|
||||
}
|
||||
}
|
||||
|
@ -270,7 +251,7 @@ STATUS IsolateOneWayNode(schema::MetaGraphT *graphT, CNodeT *node, bool removeTe
|
|||
}
|
||||
}
|
||||
if (!isSubNode) {
|
||||
// MS_LOG(ERROR)("Node %s is not in graphT %s", node->name.c_str(), graphT->name.c_str());
|
||||
MS_LOG(ERROR) << "Node " << node->name.c_str() << "is not in graphT " << graphT->name.c_str();
|
||||
return RET_PARAM_INVALID;
|
||||
} else {
|
||||
return IsolateOneWayNode(graphT, nodeIdx, removeTensor);
|
||||
|
@ -343,7 +324,7 @@ STATUS UpdateNodeIndex(CNodeT *node, uint32_t deleteIdx) {
|
|||
STATUS AddTensor2Node(schema::MetaGraphT *graphT, uint32_t nodeIdx, std::unique_ptr<TensorT> tensor,
|
||||
InsertPlace place) {
|
||||
if (nodeIdx >= graphT->nodes.size()) {
|
||||
// MS_LOG(ERROR)("nodeIdx out of range: %du", nodeIdx);
|
||||
MS_LOG(ERROR) << "nodeIdx out of range: " << nodeIdx;
|
||||
return RET_PARAM_INVALID;
|
||||
}
|
||||
graphT->allTensors.emplace_back(std::move(tensor));
|
||||
|
@ -360,16 +341,16 @@ STATUS AddTensor2Node(schema::MetaGraphT *graphT, uint32_t nodeIdx, std::unique_
|
|||
STATUS ReplaceTensorOfNode(schema::MetaGraphT *graphT, uint32_t nodeIdx, uint32_t inTensorIdx,
|
||||
std::unique_ptr<TensorT> tensor) {
|
||||
if (nodeIdx >= graphT->nodes.size()) {
|
||||
// MS_LOG(ERROR)("nodeIdx out of range: %du", nodeIdx);
|
||||
MS_LOG(ERROR) << "nodeIdx out of range: " << nodeIdx;
|
||||
return RET_PARAM_INVALID;
|
||||
}
|
||||
auto node = graphT->nodes.at(nodeIdx).get();
|
||||
if (inTensorIdx >= graphT->allTensors.size()) {
|
||||
// MS_LOG(ERROR)("inTensorIdx out of range: %du", nodeIdx);
|
||||
MS_LOG(ERROR) << "inTensorIdx out of range: " << nodeIdx;
|
||||
return RET_PARAM_INVALID;
|
||||
}
|
||||
if (!IsContain(node->inputIndex, inTensorIdx)) {
|
||||
// MS_LOG(ERROR)("inTensorIdx(%du) is not a inputIdx of node(%du)", inTensorIdx, nodeIdx);
|
||||
MS_LOG(ERROR) << "inTensorIdx(" << inTensorIdx << ") is not a inputIdx of node(" << nodeIdx << ")";
|
||||
return RET_PARAM_INVALID;
|
||||
}
|
||||
graphT->allTensors.at(inTensorIdx).swap(tensor);
|
||||
|
@ -379,7 +360,7 @@ STATUS ReplaceTensorOfNode(schema::MetaGraphT *graphT, uint32_t nodeIdx, uint32_
|
|||
NodeIter InsertNode(schema::MetaGraphT *graphT, uint32_t existNodeIdx, InsertPlace place, size_t inoutIndex,
|
||||
std::unique_ptr<CNodeT> toAddNode, STATUS *errorCode, OpDefCopyer opDefCopyer) {
|
||||
if (existNodeIdx >= graphT->nodes.size()) {
|
||||
// MS_LOG(ERROR)("nodeIdx out of range: %du", existNodeIdx);
|
||||
MS_LOG(ERROR) << "nodeIdx out of range: " << existNodeIdx;
|
||||
return graphT->nodes.end();
|
||||
}
|
||||
auto nodeIter = graphT->nodes.begin() + existNodeIdx;
|
||||
|
@ -447,17 +428,14 @@ NodeIter InsertNodeBefore(schema::MetaGraphT *graphT, NodeIter existNodeIter, si
|
|||
existNodeIter++;
|
||||
} else {
|
||||
std::vector<std::unique_ptr<CNodeT>> toAddNodes;
|
||||
int i = 0;
|
||||
for (size_t preNodeIdx : preNodeIdxes) {
|
||||
MS_ASSERT(graphT->nodes.size() > preNodeIdx);
|
||||
auto &preNode = graphT->nodes.at(preNodeIdx);
|
||||
MS_ASSERT(preNode != nullptr);
|
||||
for (size_t i = 0; i < preNodeIdxes.size(); i++) {
|
||||
MS_ASSERT(graphT->nodes.size() > preNodeIdxes.at(i));
|
||||
auto &preTensor = graphT->allTensors.at(preTensorIdx);
|
||||
MS_ASSERT(preTensor != nullptr);
|
||||
auto toAddTensor = CopyTensorDefT(preTensor);
|
||||
if (toAddTensor == nullptr) {
|
||||
*errorCode = RET_NULL_PTR;
|
||||
// MS_LOG(ERROR)("Copy TensorT failed");
|
||||
MS_LOG(ERROR) << "Copy TensorT failed";
|
||||
return graphT->nodes.end();
|
||||
}
|
||||
if (toAddNodeIn->primitive->value.type == schema::PrimitiveType_QuantDTypeCast) {
|
||||
|
@ -468,7 +446,7 @@ NodeIter InsertNodeBefore(schema::MetaGraphT *graphT, NodeIter existNodeIter, si
|
|||
size_t toAddTensorIdx = graphT->allTensors.size() - 1;
|
||||
auto toAddNode = opDefCopyer(toAddNodeIn.get());
|
||||
if (toAddNode == nullptr) {
|
||||
// MS_LOG(ERROR)("copy toAddNodeIn failed");
|
||||
MS_LOG(ERROR) << "copy toAddNodeIn failed";
|
||||
*errorCode = RET_NULL_PTR;
|
||||
return graphT->nodes.end();
|
||||
}
|
||||
|
@ -509,7 +487,7 @@ NodeIter InsertNodeAfter(schema::MetaGraphT *graphT, NodeIter existNodeIter, siz
|
|||
MS_ASSERT(postTensor != nullptr);
|
||||
auto toAddTensor = CopyTensorDefT(postTensor);
|
||||
if (toAddTensor == nullptr) {
|
||||
// MS_LOG(ERROR)("Copy TensorT failed");
|
||||
MS_LOG(ERROR) << "Copy TensorT failed";
|
||||
*errorCode = RET_NULL_PTR;
|
||||
return graphT->nodes.end();
|
||||
}
|
||||
|
@ -521,7 +499,7 @@ NodeIter InsertNodeAfter(schema::MetaGraphT *graphT, NodeIter existNodeIter, siz
|
|||
size_t toAddTensorIdx = graphT->allTensors.size() - 1;
|
||||
auto toAddNode = opDefCopyer(toAddNodeIn.get());
|
||||
if (toAddNode == nullptr) {
|
||||
// MS_LOG(ERROR)("copy toAddNodeIn failed");
|
||||
MS_LOG(ERROR) << "copy toAddNodeIn failed";
|
||||
*errorCode = RET_NULL_PTR;
|
||||
return graphT->nodes.end();
|
||||
}
|
||||
|
@ -548,7 +526,7 @@ NodeIter InsertNodeAfter(schema::MetaGraphT *graphT, NodeIter existNodeIter, siz
|
|||
MS_ASSERT(postTensor != nullptr);
|
||||
auto toAddTensor = CopyTensorDefT(postTensor);
|
||||
if (toAddTensor == nullptr) {
|
||||
// MS_LOG(ERROR)("Copy TensorT failed");
|
||||
MS_LOG(ERROR) << "Copy TensorT failed";
|
||||
*errorCode = RET_NULL_PTR;
|
||||
return graphT->nodes.end();
|
||||
}
|
||||
|
@ -560,7 +538,7 @@ NodeIter InsertNodeAfter(schema::MetaGraphT *graphT, NodeIter existNodeIter, siz
|
|||
size_t toAddTensorIdx = graphT->allTensors.size() - 1;
|
||||
auto toAddNode = opDefCopyer(toAddNodeIn.get());
|
||||
if (toAddNode == nullptr) {
|
||||
// MS_LOG(ERROR)("copy toAddNodeIn failed");
|
||||
MS_LOG(ERROR) << "copy toAddNodeIn failed";
|
||||
*errorCode = RET_NULL_PTR;
|
||||
return graphT->nodes.end();
|
||||
}
|
||||
|
@ -612,12 +590,12 @@ std::string GetModelName(const std::string &modelFile) {
|
|||
|
||||
OpGraphT *OpGraphT::Build(const schema::MetaGraphT *subGraphDef) {
|
||||
if (subGraphDef == nullptr) {
|
||||
// MS_LOG(ERROR)("subGraphDef is nullptr");
|
||||
MS_LOG(ERROR) << "subGraphDef is nullptr";
|
||||
return nullptr;
|
||||
}
|
||||
auto graph = std::unique_ptr<OpGraphT>(new OpGraphT());
|
||||
if (graph == nullptr) {
|
||||
// MS_LOG(ERROR)("malloc opgraph failed");
|
||||
MS_LOG(ERROR) << "malloc opgraph failed";
|
||||
return nullptr;
|
||||
}
|
||||
|
||||
|
@ -626,7 +604,7 @@ OpGraphT *OpGraphT::Build(const schema::MetaGraphT *subGraphDef) {
|
|||
for (auto &opDef : opDefs) {
|
||||
auto ret = graph->AddEdge(opDef.get(), &opDefs);
|
||||
if (ret != RET_OK) {
|
||||
// MS_LOG(ERROR)("%s add edge failed. ret:%d", opDef->name.c_str(), ret);
|
||||
MS_LOG(ERROR) << opDef->name.c_str() << " add edge failed. ret: " << ret;
|
||||
return nullptr;
|
||||
}
|
||||
}
|
||||
|
@ -644,7 +622,7 @@ int OpGraphT::AddEdge(const schema::CNodeT *srcNodeDef, const std::vector<std::u
|
|||
for (auto &dstNodeDef : *nodeDefs) {
|
||||
bool find = false;
|
||||
auto inputIndex = dstNodeDef->inputIndex;
|
||||
if (std::any_of(inputIndex.begin(), inputIndex.end(), [&index](int i) { return i == index; })) {
|
||||
if (std::any_of(inputIndex.begin(), inputIndex.end(), [&index](size_t i) { return i == index; })) {
|
||||
find = true;
|
||||
}
|
||||
|
||||
|
@ -664,13 +642,13 @@ int OpGraphT::AddEdge(const schema::CNodeT *srcNodeDef, const std::vector<std::u
|
|||
int OpGraphT::AddEdge(NODE_ID srcId, NODE_ID dstId) {
|
||||
auto srcNode = AddNode(srcId);
|
||||
if (srcNode == nullptr) {
|
||||
// MS_LOG(ERROR)("add srcNode failed");
|
||||
MS_LOG(ERROR) << "add srcNode failed";
|
||||
return RET_ERROR;
|
||||
}
|
||||
srcNode->AddOutEdge(dstId);
|
||||
auto dstNode = AddNode(dstId);
|
||||
if (dstNode == nullptr) {
|
||||
// MS_LOG(ERROR)("add dstNode failed");
|
||||
MS_LOG(ERROR) << "add dstNode failed";
|
||||
return RET_ERROR;
|
||||
}
|
||||
dstNode->AddInEdge(srcId);
|
||||
|
|
|
@ -109,12 +109,79 @@ STATUS NodeUtils::ConvertDims(mindspore::lite::Format src_format, const std::vec
|
|||
}
|
||||
break;
|
||||
default:
|
||||
// MS_LOG(ERROR)("Not support dst format: %d", dst_format);
|
||||
MS_LOG(ERROR) << "Not support dst format: " << dst_format;
|
||||
return RET_ERROR;
|
||||
}
|
||||
return RET_OK;
|
||||
}
|
||||
|
||||
STATUS GetFilterDim(const std::vector<int32_t> &oriDims, kTransFilterType type, int32_t* filterK, int32_t* filterC,
|
||||
int32_t* filterH, int32_t* filterW) {
|
||||
MS_ASSERT(oriDims.size() == 4);
|
||||
if (type == kKCHW2HWCK || type == kKCHW2HWKC || type == kKCHW2KHWC || type == kKCHW2CKHW) {
|
||||
*filterK = oriDims.at(KCHW_K);
|
||||
*filterC = oriDims.at(KCHW_C);
|
||||
*filterH = oriDims.at(KCHW_H);
|
||||
*filterW = oriDims.at(KCHW_W);
|
||||
} else if (type == kCKHW2HWCK || type == kCKHW2HWKC || type == kCKHW2KHWC) {
|
||||
*filterC = oriDims.at(CKHW_C);
|
||||
*filterK = oriDims.at(CKHW_K);
|
||||
*filterH = oriDims.at(CKHW_H);
|
||||
*filterW = oriDims.at(CKHW_W);
|
||||
} else if (type == kHWCK2KCHW || type == kHWCK2CKHW) {
|
||||
*filterH = oriDims.at(HWCK_H);
|
||||
*filterW = oriDims.at(HWCK_W);
|
||||
*filterC = oriDims.at(HWCK_C);
|
||||
*filterK = oriDims.at(HWCK_K);
|
||||
} else if (type == kHWKC2KCHW || type == kHWKC2CKHW) {
|
||||
*filterH = oriDims.at(HWKC_H);
|
||||
*filterW = oriDims.at(HWKC_W);
|
||||
*filterK = oriDims.at(HWKC_K);
|
||||
*filterC = oriDims.at(HWKC_C);
|
||||
} else if (type == kNHWC2KCHW || type == kNHWC2HWCK || type == kNHWC2CKHW) {
|
||||
*filterK = oriDims.at(NHWC_N);
|
||||
*filterH = oriDims.at(NHWC_H);
|
||||
*filterW = oriDims.at(NHWC_W);
|
||||
*filterC = oriDims.at(NHWC_C);
|
||||
} else if (type == kCHWK2HWCK || type == kCHWK2KHWC) {
|
||||
*filterC = oriDims.at(CHWK_C);
|
||||
*filterH = oriDims.at(CHWK_H);
|
||||
*filterW = oriDims.at(CHWK_W);
|
||||
*filterK = oriDims.at(CHWK_K);
|
||||
} else if (type == kKHWC2HWCK || type == kKHWC2CHWK) {
|
||||
*filterK = oriDims.at(KHWC_K);
|
||||
*filterH = oriDims.at(KHWC_H);
|
||||
*filterW = oriDims.at(KHWC_W);
|
||||
*filterC = oriDims.at(KHWC_C);
|
||||
} else {
|
||||
MS_LOG(ERROR) << "Unsupported transFilterType: " << type;
|
||||
return RET_ERROR;
|
||||
}
|
||||
return RET_OK;
|
||||
}
|
||||
|
||||
STATUS SetFilterDim(schema::TensorT *tensor, kTransFilterType type, int32_t filterK, int32_t filterC,
|
||||
int32_t filterH, int32_t filterW) {
|
||||
MS_ASSERT(tensor != nullptr);
|
||||
if (type == kKCHW2HWCK || type == kCKHW2HWCK || type == kNHWC2HWCK || type == kKHWC2HWCK || type == kCHWK2HWCK) {
|
||||
tensor->dims = {filterH, filterW, filterC, filterK};
|
||||
} else if (type == kKCHW2HWKC || type == kCKHW2HWKC) {
|
||||
tensor->dims = {filterH, filterW, filterK, filterC};
|
||||
} else if (type == kHWCK2KCHW || type == kHWKC2KCHW || type == kNHWC2KCHW) {
|
||||
tensor->dims = {filterK, filterC, filterH, filterW};
|
||||
} else if (type == kHWCK2CKHW || type == kHWKC2CKHW || type == kNHWC2CKHW || type == kKCHW2CKHW) {
|
||||
tensor->dims = {filterC, filterK, filterH, filterW};
|
||||
} else if (type == kKHWC2CHWK) {
|
||||
tensor->dims = {filterC, filterH, filterW, filterK};
|
||||
} else if (type == kKCHW2KHWC || type == kCKHW2KHWC || type == kCHWK2KHWC) {
|
||||
tensor->dims = {filterK, filterH, filterW, filterC};
|
||||
} else {
|
||||
MS_LOG(ERROR) << "Unsupported transFilterType: " << type;
|
||||
return RET_ERROR;
|
||||
}
|
||||
return RET_OK;
|
||||
}
|
||||
|
||||
STATUS TransFilterFormat(schema::TensorT *tensor, schema::Format dstFormat) {
|
||||
if (tensor == nullptr) {
|
||||
return RET_NULL_PTR;
|
||||
|
|
|
@ -75,72 +75,10 @@ enum kTransFilterType {
|
|||
kKCHW2CKHW // 20
|
||||
};
|
||||
|
||||
static STATUS GetFilterDim(std::vector<int32_t> &oriDims, kTransFilterType type, int32_t &filterK, int32_t &filterC,
|
||||
int32_t &filterH, int32_t &filterW) {
|
||||
MS_ASSERT(oriDims.size() == 4);
|
||||
if (type == kKCHW2HWCK || type == kKCHW2HWKC || type == kKCHW2KHWC || type == kKCHW2CKHW) {
|
||||
filterK = oriDims.at(KCHW_K);
|
||||
filterC = oriDims.at(KCHW_C);
|
||||
filterH = oriDims.at(KCHW_H);
|
||||
filterW = oriDims.at(KCHW_W);
|
||||
} else if (type == kCKHW2HWCK || type == kCKHW2HWKC || type == kCKHW2KHWC) {
|
||||
filterC = oriDims.at(CKHW_C);
|
||||
filterK = oriDims.at(CKHW_K);
|
||||
filterH = oriDims.at(CKHW_H);
|
||||
filterW = oriDims.at(CKHW_W);
|
||||
} else if (type == kHWCK2KCHW || type == kHWCK2CKHW) {
|
||||
filterH = oriDims.at(HWCK_H);
|
||||
filterW = oriDims.at(HWCK_W);
|
||||
filterC = oriDims.at(HWCK_C);
|
||||
filterK = oriDims.at(HWCK_K);
|
||||
} else if (type == kHWKC2KCHW || type == kHWKC2CKHW) {
|
||||
filterH = oriDims.at(HWKC_H);
|
||||
filterW = oriDims.at(HWKC_W);
|
||||
filterK = oriDims.at(HWKC_K);
|
||||
filterC = oriDims.at(HWKC_C);
|
||||
} else if (type == kNHWC2KCHW || type == kNHWC2HWCK || type == kNHWC2CKHW) {
|
||||
filterK = oriDims.at(NHWC_N);
|
||||
filterH = oriDims.at(NHWC_H);
|
||||
filterW = oriDims.at(NHWC_W);
|
||||
filterC = oriDims.at(NHWC_C);
|
||||
} else if (type == kCHWK2HWCK || type == kCHWK2KHWC) {
|
||||
filterC = oriDims.at(CHWK_C);
|
||||
filterH = oriDims.at(CHWK_H);
|
||||
filterW = oriDims.at(CHWK_W);
|
||||
filterK = oriDims.at(CHWK_K);
|
||||
} else if (type == kKHWC2HWCK || type == kKHWC2CHWK) {
|
||||
filterK = oriDims.at(KHWC_K);
|
||||
filterH = oriDims.at(KHWC_H);
|
||||
filterW = oriDims.at(KHWC_W);
|
||||
filterC = oriDims.at(KHWC_C);
|
||||
} else {
|
||||
MS_LOG(ERROR) << "Unsupported transFilterType: " << type;
|
||||
return RET_ERROR;
|
||||
}
|
||||
return RET_OK;
|
||||
}
|
||||
|
||||
static STATUS SetFilterDim(schema::TensorT *tensor, kTransFilterType type, int32_t filterK, int32_t filterC,
|
||||
int32_t filterH, int32_t filterW) {
|
||||
MS_ASSERT(tensor != nullptr);
|
||||
if (type == kKCHW2HWCK || type == kCKHW2HWCK || type == kNHWC2HWCK || type == kKHWC2HWCK || type == kCHWK2HWCK) {
|
||||
tensor->dims = {filterH, filterW, filterC, filterK};
|
||||
} else if (type == kKCHW2HWKC || type == kCKHW2HWKC) {
|
||||
tensor->dims = {filterH, filterW, filterK, filterC};
|
||||
} else if (type == kHWCK2KCHW || type == kHWKC2KCHW || type == kNHWC2KCHW) {
|
||||
tensor->dims = {filterK, filterC, filterH, filterW};
|
||||
} else if (type == kHWCK2CKHW || type == kHWKC2CKHW || type == kNHWC2CKHW || type == kKCHW2CKHW) {
|
||||
tensor->dims = {filterC, filterK, filterH, filterW};
|
||||
} else if (type == kKHWC2CHWK) {
|
||||
tensor->dims = {filterC, filterH, filterW, filterK};
|
||||
} else if (type == kKCHW2KHWC || type == kCKHW2KHWC || type == kCHWK2KHWC) {
|
||||
tensor->dims = {filterK, filterH, filterW, filterC};
|
||||
} else {
|
||||
MS_LOG(ERROR) << "Unsupported transFilterType: " << type;
|
||||
return RET_ERROR;
|
||||
}
|
||||
return RET_OK;
|
||||
}
|
||||
STATUS GetFilterDim(const std::vector<int32_t> &oriDims, kTransFilterType type, int32_t* filterK, int32_t* filterC,
|
||||
int32_t* filterH, int32_t* filterW);
|
||||
STATUS SetFilterDim(schema::TensorT *tensor, kTransFilterType type, int32_t filterK, int32_t filterC,
|
||||
int32_t filterH, int32_t filterW);
|
||||
|
||||
template <typename T>
|
||||
static STATUS TransFilterData(schema::TensorT *tensor, kTransFilterType type, int32_t filterK, int32_t filterC,
|
||||
|
@ -356,7 +294,7 @@ static STATUS TransFilterFormat(schema::TensorT *tensor, kTransFilterType type)
|
|||
int32_t filterW;
|
||||
int32_t filterC;
|
||||
int32_t filterK;
|
||||
auto status = GetFilterDim(oriDims, type, filterK, filterC, filterH, filterW);
|
||||
auto status = GetFilterDim(oriDims, type, &filterK, &filterC, &filterH, &filterW);
|
||||
if (status != RET_OK) {
|
||||
MS_LOG(ERROR) << "GetFilterDim failed: " << status;
|
||||
return status;
|
||||
|
|
|
@ -42,107 +42,6 @@ std::unique_ptr<schema::QuantParamT> CopyQuantParamT(const std::unique_ptr<schem
|
|||
return std::move(dstQuantParam);
|
||||
}
|
||||
|
||||
std::unique_ptr<QuantParamT> CopyQuantParamArrayT(const std::unique_ptr<QuantParamT> &srcQuantParamArray) {
|
||||
MS_ASSERT(srcQuantParamArray != nullptr);
|
||||
auto dstQuantParamArrayT = std::unique_ptr<QuantParamT>(new (std::nothrow) QuantParamT());
|
||||
if (dstQuantParamArrayT == nullptr) {
|
||||
// MS_LOG(ERROR)("new dstQuantParamArrayT failed");
|
||||
return nullptr;
|
||||
}
|
||||
/*
|
||||
for (size_t i = 0; i < srcQuantParamArray->param.size(); i++) {
|
||||
auto &srcQuantParam = srcQuantParamArray->param.at(i);
|
||||
MS_ASSERT(srcQuantParam != nullptr);
|
||||
std::unique_ptr<QuantParamT> dstQuantParam(new (std::nothrow) QuantParamT());
|
||||
if (dstQuantParam == nullptr) {
|
||||
//MS_LOG(ERROR)("new dstQuantParam failed");
|
||||
dstQuantParamArrayT.release();
|
||||
return nullptr;
|
||||
}
|
||||
dstQuantParam->scale = srcQuantParam->scale;
|
||||
dstQuantParam->zeroPoint = srcQuantParam->zeroPoint;
|
||||
dstQuantParam->min = srcQuantParam->min;
|
||||
dstQuantParam->max = srcQuantParam->max;
|
||||
dstQuantParam->narrowRange = srcQuantParam->narrowRange;
|
||||
dstQuantParam->numBits = srcQuantParam->numBits;
|
||||
dstQuantParamArrayT->param.emplace_back(std::move(dstQuantParam));
|
||||
}
|
||||
*/
|
||||
return std::move(dstQuantParamArrayT);
|
||||
}
|
||||
|
||||
std::unique_ptr<QuantParamT> GetInTensorQuantParamArray(const MetaGraphT &graphT, size_t tensorIdx) {
|
||||
auto preNodeIdxes = GetLinkedPreIdx(graphT, tensorIdx);
|
||||
MS_ASSERT(preNodeIdxes.size() <= 1);
|
||||
if (preNodeIdxes.empty()) {
|
||||
// MS_LOGD("the %zuth tensor has no preNode", tensorIdx);
|
||||
return nullptr;
|
||||
}
|
||||
auto preNodeIdx = preNodeIdxes.front();
|
||||
MS_ASSERT(preNodeIdx < graphT.nodes.size());
|
||||
auto &preNode = graphT.nodes.at(preNodeIdx);
|
||||
MS_ASSERT(preNode != nullptr);
|
||||
MS_ASSERT(preNode->inputIndex.size() + preNode->outputIndex.size() == preNode->quantParam.size());
|
||||
/*
|
||||
for (size_t i = 0; i < preNode->outputIndex.size(); i++) {
|
||||
if (preNode->outputIndex.at(i) == tensorIdx) {
|
||||
auto &quantPArray = preNode->quantParam.at(preNode->inputIndex.size() + i);
|
||||
MS_ASSERT(quantPArray->param.size() == 1); // only support prelayer
|
||||
MS_ASSERT(quantPArray->param.front() != nullptr);
|
||||
if (quantPArray->param.front()->min == FLT_MAX) {
|
||||
//MS_LOGD("the %zuth tensor's preNode's relative quantParam has not be inited", tensorIdx);
|
||||
return nullptr;
|
||||
} else {
|
||||
return std::move(CopyQuantParamArrayT(quantPArray));
|
||||
}
|
||||
}
|
||||
}
|
||||
*/
|
||||
MS_ASSERT(false);
|
||||
return nullptr;
|
||||
}
|
||||
|
||||
std::unique_ptr<QuantParamT> GetOutTensorQuantParamArray(const MetaGraphT &graphT, size_t tensorIdx) {
|
||||
auto postNodeIdxes = GetLinkedPostIdx(graphT, tensorIdx);
|
||||
if (postNodeIdxes.empty()) {
|
||||
// MS_LOGD("the %zuth tensor has no postNode", tensorIdx);
|
||||
return nullptr;
|
||||
}
|
||||
// find one postNode which can give valid quantParamArray
|
||||
for (auto postNodeIdx : postNodeIdxes) {
|
||||
MS_ASSERT(postNodeIdx < graphT.nodes.size());
|
||||
auto &postNode = graphT.nodes.at(postNodeIdx);
|
||||
MS_ASSERT(postNode != nullptr);
|
||||
MS_ASSERT(postNode->inputIndex.size() + postNode->outputIndex.size() == postNode->quantParam.size());
|
||||
/*
|
||||
for (size_t i = 0; i < postNode->inputIndex.size(); i++) {
|
||||
if (postNode->inputIndex.at(i) == tensorIdx) {
|
||||
auto &quantPArray = postNode->quantParam.at(i);
|
||||
MS_ASSERT(quantPArray->param.size() == 1); // only support prelayer
|
||||
MS_ASSERT(quantPArray->param.front() != nullptr);
|
||||
// check if postNode has valid quantParam
|
||||
if (quantPArray->param.front()->min == FLT_MAX) {
|
||||
continue;
|
||||
}
|
||||
MS_ASSERT(graphT.allTensors.size() > postNode->inputIndex.at(i));
|
||||
auto &tensor = graphT.allTensors.at(postNode->inputIndex.at(i));
|
||||
MS_ASSERT(tensor != nullptr);
|
||||
if (tensor->refCount == schema::NodeType_ValueNode) {
|
||||
continue;
|
||||
}
|
||||
// find valid quantParam return
|
||||
auto paramArray = CopyQuantParamArrayT(quantPArray);
|
||||
if (paramArray == nullptr) {
|
||||
//MS_LOG(ERROR)("CopyQuantParamArrayT return nullptr");
|
||||
return nullptr;
|
||||
}
|
||||
return std::move(paramArray);
|
||||
}
|
||||
}*/
|
||||
}
|
||||
return nullptr;
|
||||
}
|
||||
|
||||
size_t GetElementSize(const TensorT &tensor) { return GetElementSize(TypeId(tensor.dataType)); }
|
||||
|
||||
size_t GetElementSize(const TypeId &dataType) {
|
||||
|
|
|
@ -58,10 +58,6 @@ std::unique_ptr<schema::QuantParamT> CopyQuantParamT(const std::unique_ptr<schem
|
|||
std::unique_ptr<schema::QuantParamT> CopyQuantParamArrayT(
|
||||
const std::unique_ptr<schema::QuantParamT> &srcQuantParamArray);
|
||||
|
||||
std::unique_ptr<schema::QuantParamT> GetInTensorQuantParamArray(const schema::MetaGraphT &graphT, size_t tensorIdx);
|
||||
|
||||
std::unique_ptr<schema::QuantParamT> GetOutTensorQuantParamArray(const schema::MetaGraphT &graphT, size_t tensorIdx);
|
||||
|
||||
using MSGraphDefTPtr = std::shared_ptr<schema::MetaGraphT>;
|
||||
|
||||
enum TensorType { CONST = 0, GRAPH_INPUT = 1, OP_OUTPUT = 2, TF_CONST = 3 };
|
||||
|
|
|
@ -37,6 +37,7 @@
|
|||
namespace mindspore {
|
||||
namespace lite {
|
||||
using FmkType = converter::FmkType;
|
||||
static const char *DELIM_SLASH = "/";
|
||||
Converter::Converter() {
|
||||
this->transform = new GraphDefTransform;
|
||||
this->anfTransform = new AnfTransform;
|
||||
|
|
|
@ -333,7 +333,7 @@ STATUS BatchNormFoldFusionPass::GenNewWeightTensor() {
|
|||
void *miData = muTensor->data.data();
|
||||
auto *castedMiData = static_cast<float *>(miData);
|
||||
size_t stride = weightShapeSize / channelOut;
|
||||
for (size_t i = 0; i < channelOut; i++) {
|
||||
for (int i = 0; i < channelOut; i++) {
|
||||
for (size_t j = 0; j < stride; j++) {
|
||||
castedNewWeightData[i * stride + j] = castedOldWeightData[i * stride + j] * castedGammaData[i] / castedMiData[i];
|
||||
}
|
||||
|
@ -367,7 +367,7 @@ STATUS BatchNormFoldFusionPass::GenNewBiasTensor() { // bias has no quant
|
|||
MS_ASSERT(sigmaTensor->dataType == DataType_DT_FLOAT);
|
||||
void *sigmaData = sigmaTensor->data.data();
|
||||
auto *castedSigmaData = static_cast<float *>(sigmaData);
|
||||
for (size_t i = 0; i < channelOut; i++) {
|
||||
for (int i = 0; i < channelOut; i++) {
|
||||
castedNewBiasData[i] = castedBetaData[i] - castedGammaData[i] * castedMiData[i] / castedSigmaData[i];
|
||||
}
|
||||
return RET_OK;
|
||||
|
|
|
@ -19,8 +19,6 @@
|
|||
#include <memory>
|
||||
#include "tools/converter/legacy_optimizer/fusion/format_trans_fusion_pass.h"
|
||||
#include "utils/log_adapter.h"
|
||||
#include "securec/include/securec.h"
|
||||
// #include "utils/log_adapter.h"
|
||||
#include "tools/common/graph_util.h"
|
||||
#include "include/errorcode.h"
|
||||
#include "mindspore/lite/schema/inner/model_generated.h"
|
||||
|
@ -44,7 +42,7 @@ STATUS FormatTransFusionPass::DefinePattern() {
|
|||
std::unique_ptr<FusionPattern> nc2NhAndNh2NcFusionPattern(new (std::nothrow)
|
||||
FusionPattern(kNc2NhAndNh2NcFusionPattern));
|
||||
if (nc2NhAndNh2NcFusionPattern == nullptr) {
|
||||
// MS_LOG(ERROR) << "new %s failed", kNc2NhAndNh2NcFusionPattern);
|
||||
MS_LOG(ERROR) << "new " << kNc2NhAndNh2NcFusionPattern << "failed";
|
||||
return RET_ERROR;
|
||||
}
|
||||
nc2NhAndNh2NcFusionPattern->AddPatternOp(nc2nhOp);
|
||||
|
@ -52,7 +50,6 @@ STATUS FormatTransFusionPass::DefinePattern() {
|
|||
nc2NhAndNh2NcFusionPattern->Finish();
|
||||
this->patterns.emplace_back(nc2NhAndNh2NcFusionPattern.release());
|
||||
}
|
||||
// nchw2nhwc + QuantDtypeCast + nhwc2nchw
|
||||
{
|
||||
auto nc2nhOp = std::make_shared<PatternOp>();
|
||||
nc2nhOp->id = kFormatTransNc2NhOp;
|
||||
|
@ -68,7 +65,7 @@ STATUS FormatTransFusionPass::DefinePattern() {
|
|||
nh2ncOp->left = passOp;
|
||||
std::unique_ptr<FusionPattern> nc2NhAndNh2NcPassFusionPattern(new FusionPattern(kNc2NhAndNh2NcPassFusionPattern));
|
||||
if (nc2NhAndNh2NcPassFusionPattern == nullptr) {
|
||||
// MS_LOG(ERROR) << "new %s failed", kNc2NhAndNh2NcPassFusionPattern);
|
||||
MS_LOG(ERROR) << "new " << kNc2NhAndNh2NcPassFusionPattern << "failed";
|
||||
return RET_ERROR;
|
||||
}
|
||||
nc2NhAndNh2NcPassFusionPattern->AddPatternOp(nc2nhOp);
|
||||
|
@ -90,7 +87,7 @@ STATUS FormatTransFusionPass::DefinePattern() {
|
|||
std::unique_ptr<FusionPattern> nh2NcAndNc2NhFusionPattern(new (std::nothrow)
|
||||
FusionPattern(kNh2NcAndNc2NhFusionPattern));
|
||||
if (nh2NcAndNc2NhFusionPattern == nullptr) {
|
||||
// MS_LOG(ERROR) << "new %s failed", kNh2NcAndNc2NhFusionPattern);
|
||||
MS_LOG(ERROR) << "new " << kNh2NcAndNc2NhFusionPattern << "failed";
|
||||
return RET_ERROR;
|
||||
}
|
||||
nh2NcAndNc2NhFusionPattern->AddPatternOp(nh2ncOp);
|
||||
|
|
|
@ -247,7 +247,7 @@ bool FusionPass::MatchTree(schema::MetaGraphT *graph, size_t nodeIdx, const std:
|
|||
// path is setted and not pointer to this node
|
||||
if (target->pathSetted) {
|
||||
MS_ASSERT(target->path != nullptr);
|
||||
if (target->path->nodeIdx != nodeIdx) {
|
||||
if (target->path->nodeIdx != static_cast<int>(nodeIdx)) {
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
|
|
@ -108,7 +108,6 @@ STATUS MatMulBiasAddFusionPass::DoFusion(MetaGraphT *graph, const std::string &p
|
|||
transA = matMulNode->primitive->value.AsMatMul()->transposeA;
|
||||
transB = matMulNode->primitive->value.AsMatMul()->transposeB;
|
||||
MS_ASSERT(matMulNode->primitive->value.value != nullptr);
|
||||
delete (matMulNode->primitive->value.value);
|
||||
matMulNode->primitive->value.type = schema::PrimitiveType_FullConnection;
|
||||
matMulNode->primitive->value.value = fcAttr.release();
|
||||
|
||||
|
@ -135,11 +134,6 @@ STATUS MatMulBiasAddFusionPass::DoFusion(MetaGraphT *graph, const std::string &p
|
|||
STATUS MatMulBiasAddFusionPass::InsertTransposeNode(MetaGraphT *graph, const std::shared_ptr<Path> &matMulPath) {
|
||||
MS_ASSERT(graph != nullptr);
|
||||
MS_ASSERT(matMulPath != nullptr);
|
||||
auto &matMulNode = graph->nodes.at(matMulPath->nodeIdx);
|
||||
MS_ASSERT(graph->allTensors.size() > matMulNode->inputIndex.at(0));
|
||||
MS_ASSERT(graph->allTensors.size() > matMulNode->inputIndex.at(2));
|
||||
const auto &tensorA = graph->allTensors.at(matMulNode->inputIndex.at(0));
|
||||
const auto &tensorB = graph->allTensors.at(matMulNode->inputIndex.at(1));
|
||||
|
||||
std::vector<size_t> insertNodeIdxList;
|
||||
if (transA) {
|
||||
|
|
Some files were not shown because too many files have changed in this diff Show More
Loading…
Reference in New Issue