pack optimize

This commit is contained in:
wangpingan 2023-02-17 17:17:48 +08:00 committed by wangpingan2
parent 985c48e543
commit ac7b243a5f
17 changed files with 686 additions and 3 deletions

View File

@ -142,6 +142,7 @@ set(LITE_SRC
${CMAKE_CURRENT_SOURCE_DIR}/litert/sub_graph_kernel.cc
${CMAKE_CURRENT_SOURCE_DIR}/litert/scheduler.cc
${CMAKE_CURRENT_SOURCE_DIR}/litert/lite_session.cc
${CMAKE_CURRENT_SOURCE_DIR}/litert/runtime_packed_node_pass.cc
${CMAKE_CURRENT_SOURCE_DIR}/litert/model_manager.cc
${CMAKE_CURRENT_SOURCE_DIR}/errorcode.cc
${CMAKE_CURRENT_SOURCE_DIR}/litert/cpu_info.cc

View File

@ -125,7 +125,7 @@ void MatmulDynamicBaseInt8CPUKernel::FreeTmpBuffer() {
free(pack_a_ptr_);
pack_a_ptr_ = nullptr;
}
if (pack_b_ptr_ != nullptr) {
if (pack_b_ptr_ != nullptr && !weight_is_packed_) {
free(pack_b_ptr_);
pack_b_ptr_ = nullptr;
}
@ -133,7 +133,7 @@ void MatmulDynamicBaseInt8CPUKernel::FreeTmpBuffer() {
free(input_sums_);
input_sums_ = nullptr;
}
if (weight_sums_ != nullptr) {
if (weight_sums_ != nullptr && !weight_is_packed_) {
free(weight_sums_);
weight_sums_ = nullptr;
}
@ -162,6 +162,12 @@ int MatmulDynamicBaseInt8CPUKernel::InitInputQuantParam() {
}
int MatmulDynamicBaseInt8CPUKernel::TransferB() {
if (weight_is_packed_) {
CHECK_NULL_RETURN(weight_sums_tensor_);
pack_b_ptr_ = static_cast<int8_t *>(in_tensors_.at(kWeightIndex)->data());
weight_sums_ = static_cast<int *>(weight_sums_tensor_->data());
return RET_OK;
}
auto weight_data = reinterpret_cast<int8_t *>(in_tensors_.at(kWeightIndex)->data());
CHECK_NULL_RETURN(weight_data);
for (int i = 0; i < b_batch_; i++) {
@ -177,6 +183,7 @@ int MatmulDynamicBaseInt8CPUKernel::TransferB() {
CalcWeightSums(current_weight, param_->deep_, param_->col_, current_sums, RowMajor);
}
}
return RET_OK;
}
@ -205,6 +212,10 @@ int MatmulDynamicBaseInt8CPUKernel::InitMatrixABuffer() {
}
int MatmulDynamicBaseInt8CPUKernel::InitMatrixBBuffer() {
if (weight_is_packed_) {
return RET_OK;
}
if (pack_b_ptr_ != nullptr) {
free(pack_b_ptr_);
pack_b_ptr_ = nullptr;

View File

@ -42,6 +42,12 @@ class MatmulDynamicBaseInt8CPUKernel : public LiteKernel {
static int InitBroadcastParams(const std::vector<int> &a_shape_const, const std::vector<int> &b_shape_const,
MatMulParameter *params, std::vector<int> *a_offsets, std::vector<int> *b_offsets);
const int8_t *GetPackBPtr() const { return pack_b_ptr_; }
const int *GetWeightSums() const { return weight_sums_; }
const int GetBBatch() const { return b_batch_; }
void SetWeightIsPacked(bool weight_is_packed) { this->weight_is_packed_ = weight_is_packed; }
void SetWeightSumsTensor(lite::Tensor *weight_sums_tensor) { this->weight_sums_tensor_ = weight_sums_tensor; }
private:
void ResizeMatrixBParameter();
int CopyBias();
@ -90,6 +96,8 @@ class MatmulDynamicBaseInt8CPUKernel : public LiteKernel {
int thread_stride_ = 0;
bool enable_fp16_ = false;
PackFunc b_pack_func_ = nullptr;
bool weight_is_packed_ = false;
lite::Tensor *weight_sums_tensor_ = nullptr;
};
} // namespace mindspore::kernel

View File

@ -62,6 +62,7 @@
#include "kernel/ascend/plugin/ascend_kernel_plugin.h"
#endif
#include "thread/parallel_thread_pool_manager.h"
#include "src/litert/runtime_packed_node_pass.h"
using AbstractBaseModel = mindspore::infer::AbstractBaseModel;
@ -585,6 +586,8 @@ int LiteSession::CompileGraph(Model *model) {
InitGraphInputTensors(model);
InitGraphOutputTensors(model);
PackedNodePass::GetInstance().Run(model, tensors_);
// scheduler kernels
Scheduler scheduler(context_.get(), ms_context_, model, &tensors_, &inputs_, &outputs_, is_train_session_,
&is_infershape_, &is_control_flow_, &infer_along_running_, execution_plan_, delegate_,
@ -698,6 +701,11 @@ int LiteSession::PrepareKernels(const Model *model) {
return RET_ERROR;
}
for (auto &node : subgraph_kernel->nodes()) {
ret = PackKernelExec(node, tensors_);
if (ret != RET_OK) {
MS_LOG(ERROR) << "Pack KernelExec failed.";
return ret;
}
ret = node->Prepare();
if (ret != RET_OK) {
MS_LOG(ERROR) << "node: " << node->name() << " prepare failed.";

View File

@ -0,0 +1,261 @@
/**
* Copyright 2023 Huawei Technologies Co., Ltd
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#include "src/litert/runtime_packed_node_pass.h"
#include "nnacl/op_base.h"
#include "src/litert/kernel/cpu/int8/matmul_dynamic_base_int8.h"
using RecoveryWeightFunc = void (*)(void *, void *, int, int, bool);
namespace mindspore {
namespace {
constexpr size_t kFlatbuffersBuilderInitSize = 1024;
constexpr auto kActivationType = "activation_type";
constexpr auto kTransposeA = "transpose_a";
constexpr auto kTransposeB = "transpose_b";
constexpr auto kArm64SimdDot = "ARM64SIMD_DOT";
} // namespace
namespace lite {
PackedNodePass::~PackedNodePass() {
for (auto &pack_info : node_pack_info_map_) {
delete pack_info.second;
}
node_pack_info_map_.clear();
}
void PackedNodePass::Run(Model *model, const std::vector<Tensor *> &tensors) {
for (auto &node : model->graph_.all_nodes_) {
MS_ASSERT(node != nullptr);
if (node->node_type_ != schema::PrimitiveType_Custom) {
continue;
}
auto *primitive = reinterpret_cast<const schema::Primitive *>(node->primitive_);
if (primitive == nullptr) {
MS_LOG(ERROR) << "Op " << node->name_ << " should exist in model!";
return;
}
auto custom = primitive->value_as_Custom();
if (custom == nullptr || custom->type() == nullptr) {
MS_LOG(ERROR) << "Custom node is nullptr";
return;
}
auto custom_type = custom->type()->str();
if (custom_type != "MatmulFusionPacked") {
continue;
}
flatbuffers::FlatBufferBuilder fbb(kFlatbuffersBuilderInitSize);
auto custom_attr = custom->attr();
std::map<std::string, std::string> attr_map;
for (size_t i = 0; i < custom_attr->size(); ++i) {
auto attr = custom_attr->Get(i);
auto attr_key = attr->name()->str();
auto data_bytes = attr->data();
int data_size = static_cast<int>(data_bytes->size());
std::string attr_value;
for (int j = 0; j < data_size; j++) {
attr_value.push_back(static_cast<char>(data_bytes->Get(j)));
}
attr_map[attr_key] = attr_value;
}
if (attr_map.find(kActivationType) == attr_map.end() || attr_map.find(kTransposeA) == attr_map.end() ||
attr_map.find(kTransposeB) == attr_map.end()) {
MS_LOG(ERROR) << "Custom attr error.";
return;
}
auto val_offset = schema::CreateMatMulFusion(
fbb, std::atoi(attr_map[kTransposeA].c_str()), std::atoi(attr_map[kTransposeB].c_str()),
static_cast<schema::ActivationType>(std::atoi(attr_map[kActivationType].c_str())));
auto prim_offset = schema::CreatePrimitive(fbb, schema::PrimitiveType_MatMulFusion, val_offset.o);
fbb.Finish(prim_offset);
void *prim = malloc(fbb.GetSize());
if (prim == nullptr) {
MS_LOG(ERROR) << "malloc primitive failed.";
return;
}
memcpy(prim, fbb.GetBufferPointer(), fbb.GetSize());
auto custom_primitive = flatbuffers::GetRoot<schema::Primitive>(prim);
fbb.Clear();
PackInfo *pack_info = new (std::nothrow) PackInfo();
if (pack_info == nullptr) {
free(prim);
MS_LOG(ERROR) << "new PackInfo failed.";
return;
}
node->primitive_ = custom_primitive;
pack_info->is_packed_ = true;
pack_info->weight_sums_index_ = node->input_indices_.back();
pack_info->b_batch_ = std::atoi(attr_map["b_batch"].c_str());
pack_info->col_ = std::atoi(attr_map["col"].c_str());
pack_info->deep_ = std::atoi(attr_map["deep"].c_str());
pack_info->col_align_ = std::atoi(attr_map["col_align"].c_str());
pack_info->deep_align_ = std::atoi(attr_map["deep_align"].c_str());
pack_info->b_transpose_ = std::atoi(attr_map[kTransposeB].c_str());
pack_info->cpu_option_ = attr_map["cpu_option"];
AddNodePackInfo(node->name_, pack_info);
node->input_indices_.pop_back();
node->node_type_ = schema::PrimitiveType_MatMulFusion;
}
if (!(reinterpret_cast<lite::LiteModel *>(model)->keep_model_buf())) {
CopyWeightBiasSumsTensor(tensors);
}
}
void PackedNodePass::CopyWeightBiasSumsTensor(const std::vector<Tensor *> &tensors) {
for (auto &pack_info : node_pack_info_map_) {
auto index = static_cast<size_t>(pack_info.second->weight_sums_index_);
if (index > tensors.size()) {
return;
}
auto tensor = tensors[index];
if (!tensor->IsConst() && tensor->data() != nullptr) {
return;
}
if (!tensor->IsConst() || tensor->own_data()) {
continue;
}
if (tensor->data_type() == kObjectTypeTensorType) {
MS_ASSERT(tensor->data() == nullptr);
} else {
auto copy_tensor = Tensor::CopyTensor(*tensor, true);
if (copy_tensor == nullptr) {
MS_LOG(ERROR) << "Copy tensor failed";
return;
}
tensor->FreeData();
tensor->set_data(copy_tensor->data());
tensor->set_own_data(true);
copy_tensor->set_data(nullptr);
delete copy_tensor;
}
}
}
void MatmulDynamicSdotInt8Cpu(void *src, void *dst, int row, int col, bool transpose) {
auto src_int8 = static_cast<int8_t *>(src);
auto dst_int8 = static_cast<int8_t *>(dst);
if (!transpose) {
// RowMajor2Col4x16MajorInt8
int row_4 = UP_ROUND(row, C4NUM);
int stride = C16NUM * C4NUM;
for (int r = 0; r < row_4; ++r) {
for (int c = 0; c < col; ++c) {
int stride_idx = c / C16NUM * (row_4 / C4NUM) + r / C4NUM;
if (r < row) {
int src_idx = r * col + c;
src_int8[src_idx] = dst_int8[stride * stride_idx + c % C16NUM * C4NUM + r % C4NUM];
}
}
}
} else {
int temp = row;
row = col;
col = temp;
// RowMajor2Row4x16MajorInt8
int col4 = UP_ROUND(col, C4NUM);
for (int r = 0; r < row; r++) {
int rd16 = r / C16NUM;
int rm16 = r % C16NUM;
for (int c = 0; c < col; c++) {
int cd4 = c / C4NUM;
int cm4 = c % C4NUM;
int dst_index = rd16 * col4 * C16NUM + cd4 * C16NUM * C4NUM + rm16 * C4NUM + cm4;
int src_index = r * col + c;
src_int8[src_index] = dst_int8[dst_index];
}
}
}
}
RecoveryWeightFunc GetRecoveryWeightFunc(const int quant_type, const TypeId data_type, const int node_type,
const std::string &cpu_option) {
if (cpu_option == kArm64SimdDot && node_type == schema::PrimitiveType_MatMulFusion &&
quant_type == schema::QuantType_QUANT_DYNAMIC && data_type == kNumberTypeInt8) {
return MatmulDynamicSdotInt8Cpu;
}
return nullptr;
}
int PackedMatmulKernelExec(kernel::KernelExec *kernel_exec, const std::vector<Tensor *> &tensors) {
auto pack_info = PackedNodePass::GetInstance().GetNodePackInfo(kernel_exec->name());
if (pack_info == nullptr) {
return RET_OK;
}
MS_CHECK_TRUE_MSG(kernel_exec->in_tensors().size() >= kInputSize1, lite::RET_ERROR,
"kernel doesn't have weight tensor.");
auto dst_tensor = kernel_exec->in_tensors()[SECOND_INPUT];
auto kernel = kernel_exec->kernel();
MS_CHECK_TRUE_MSG(kernel != nullptr, lite::RET_NULL_PTR, "kernel is nullptr.");
auto param = reinterpret_cast<MatMulParameter *>(kernel_exec->op_parameter());
if (dst_tensor->data_type() != kNumberTypeInt8 || kernel->quant_type() != schema::QuantType_QUANT_DYNAMIC) {
return RecoveryPackedWeight(dst_tensor, static_cast<int>(kernel->quant_type()), dst_tensor->data_type(),
schema::PrimitiveType_MatMulFusion, pack_info);
}
if (param->matmul_type_ != kMatmulDynamicSdotInt8Cpu && pack_info->cpu_option_ == kArm64SimdDot) {
return RecoveryPackedWeight(dst_tensor, static_cast<int>(kernel->quant_type()), dst_tensor->data_type(),
schema::PrimitiveType_MatMulFusion, pack_info);
}
auto matmul_kernel = static_cast<kernel::MatmulDynamicBaseInt8CPUKernel *>(kernel);
matmul_kernel->SetWeightIsPacked(true);
auto index = static_cast<size_t>(pack_info->weight_sums_index_);
if (index < tensors.size()) {
matmul_kernel->SetWeightSumsTensor(tensors.at(index));
}
return lite::RET_OK;
}
int RecoveryPackedWeight(Tensor *weight, const int quant_type, const TypeId data_type, const int node_type,
PackInfo *pack_info) {
auto recovery_func = GetRecoveryWeightFunc(quant_type, data_type, node_type, pack_info->cpu_option_);
if (recovery_func == nullptr) {
MS_LOG(ERROR) << "unsupported recovery func.";
return RET_NULL_PTR;
}
void *unpack_data = malloc(weight->Size());
if (unpack_data == nullptr) {
MS_LOG(ERROR) << "malloc unpack_data failed.";
return RET_NULL_PTR;
}
void *pack_b_ptr = weight->data();
for (int i = 0; i < pack_info->b_batch_; i++) {
void *current_weight;
void *current_b_pack;
if (weight->data_type() == kNumberTypeInt8) {
current_weight = static_cast<void *>(static_cast<int8_t *>(unpack_data) + i * pack_info->deep_ * pack_info->col_);
current_b_pack =
static_cast<void *>(static_cast<int8_t *>(pack_b_ptr) + i * pack_info->col_align_ * pack_info->deep_align_);
} else {
free(unpack_data);
MS_LOG(ERROR) << "unsupported data type.";
return RET_ERROR;
}
recovery_func(current_weight, current_b_pack, pack_info->deep_, pack_info->col_, pack_info->b_transpose_);
}
weight->FreeData();
weight->set_data(unpack_data);
return RET_OK;
}
int PackKernelExec(kernel::KernelExec *kernel_exec, const std::vector<Tensor *> &tensors) {
if (kernel_exec->type() == schema::PrimitiveType_MatMulFusion) {
return PackedMatmulKernelExec(kernel_exec, tensors);
}
return RET_OK;
}
} // namespace lite
} // namespace mindspore

View File

@ -0,0 +1,80 @@
/**
* Copyright 2023 Huawei Technologies Co., Ltd
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#ifndef MINDSPORE_LITE_SRC_LITERT_RUNTIME_PACKED_NODE_PASS_
#define MINDSPORE_LITE_SRC_LITERT_RUNTIME_PACKED_NODE_PASS_
#include <string>
#include <map>
#include <vector>
#include "src/litert/lite_model.h"
#include "src/tensor.h"
#include "src/litert/kernel_exec.h"
namespace mindspore {
namespace lite {
struct PackInfo {
bool is_packed_{false};
int weight_sums_index_;
int b_batch_;
int deep_;
int col_;
int deep_align_;
int col_align_;
bool b_transpose_;
std::string cpu_option_;
};
class PackedNodePass {
public:
static PackedNodePass &GetInstance() {
static PackedNodePass instance;
return instance;
}
PackInfo *GetNodePackInfo(const std::string &node_name) {
if (this->node_pack_info_map_.find(node_name) == this->node_pack_info_map_.end()) {
return nullptr;
}
return this->node_pack_info_map_[node_name];
}
void Run(Model *model, const std::vector<Tensor *> &tensors);
void CopyWeightBiasSumsTensor(const std::vector<Tensor *> &tensors);
protected:
void AddNodePackInfo(const std::string &node_name, PackInfo *pack_info) {
if (this->node_pack_info_map_.find(node_name) != this->node_pack_info_map_.end()) {
MS_LOG(WARNING) << "Key conflict when add weight sums index.";
}
this->node_pack_info_map_[node_name] = pack_info;
}
private:
PackedNodePass() = default;
~PackedNodePass();
private:
std::map<std::string, PackInfo *> node_pack_info_map_;
};
int PackKernelExec(kernel::KernelExec *kernel_exec, const std::vector<Tensor *> &tensors);
// packed weight data -> unpack
int RecoveryPackedWeight(Tensor *weight, const int quant_type, const TypeId data_type, const int node_type,
PackInfo *packInfo);
} // namespace lite
} // namespace mindspore
#endif // MINDSPORE_LITE_SRC_LITERT_RUNTIME_PACKED_NODE_PASS_

View File

@ -48,6 +48,8 @@ include_directories(${TOP_DIR}/mindspore/ccsrc/plugin/device/cpu/kernel)
file(GLOB_RECURSE CONVERTER_SRC RELATIVE ${CMAKE_CURRENT_SOURCE_DIR}
${CMAKE_CURRENT_SOURCE_DIR}/ops/*.cc
${CMAKE_CURRENT_SOURCE_DIR}/converter.cc
${CMAKE_CURRENT_SOURCE_DIR}/offline_packing_optimizer.cc
${CMAKE_CURRENT_SOURCE_DIR}/converter_packed_node.cc
${CMAKE_CURRENT_SOURCE_DIR}/converter_funcgraph.cc
${CMAKE_CURRENT_SOURCE_DIR}/converter_metagraph.cc
${CMAKE_CURRENT_SOURCE_DIR}/anf_transform.cc
@ -167,6 +169,7 @@ set(LITE_SRC ${API_SRC}
${SRC_DIR}/litert/sub_graph_split.cc
${KERNEL_ONLINE_FUSION_SRC}
${SRC_DIR}/litert/lite_session.cc
${SRC_DIR}/litert/runtime_packed_node_pass.cc
${SRC_DIR}/litert/executor.cc
${SRC_DIR}/litert/lite_model.cc
${SRC_DIR}/litert/model_manager.cc

View File

@ -35,6 +35,7 @@ constexpr auto kDataPreprocessParam = "data_preprocess_param";
constexpr auto kRegistry = "registry";
constexpr auto kAclOptionParam = "acl_option_cfg_param";
constexpr auto kMicroParam = "micro_param";
constexpr auto kCpuOptionParam = "cpu_option_cfg_param";
} // namespace
using ShapeVector = std::vector<int64_t>;
const int kBatchDim = 0;
@ -286,6 +287,12 @@ int ConfigFileParser::ParseConfigParam(std::map<std::string, std::map<std::strin
MS_LOG(ERROR) << "ParseWeightQuantString failed.";
return ret;
}
ret = ParseCpuOptionCfgString(*maps);
(void)maps->erase(kCpuOptionParam);
if (ret != RET_OK) {
MS_LOG(ERROR) << "ParseCpuOptionCfgString failed.";
return ret;
}
return RET_OK;
}
@ -425,5 +432,15 @@ int ConfigFileParser::ParseWeightQuantString(const std::map<std::string, std::ma
}
return RET_OK;
}
int ConfigFileParser::ParseCpuOptionCfgString(const std::map<std::string, std::map<std::string, std::string>> &maps) {
if (maps.find(kCpuOptionParam) != maps.end()) {
const auto &map = maps.at(kCpuOptionParam);
std::map<std::string, std::string &> parse_map{{"architecture", cpu_option_cfg_string_.architecture},
{"instruction", cpu_option_cfg_string_.instruction}};
return SetMapData(map, parse_map, kCpuOptionParam);
}
return RET_OK;
}
} // namespace lite
} // namespace mindspore

View File

@ -98,6 +98,11 @@ struct MicroParamString {
std::string enable_micro;
};
struct CpuOptionCfgString {
std::string architecture;
std::string instruction;
};
class ConfigFileParser {
public:
int ParseConfigFile(const std::string &config_file_path);
@ -112,6 +117,7 @@ class ConfigFileParser {
RegistryInfoString GetRegistryInfoString() const { return this->registry_info_string_; }
AclOptionCfgString GetAclOptionCfgString() { return this->acl_option_cfg_string_; }
MicroParamString GetMicroParamString() { return this->micro_param_string_; }
CpuOptionCfgString GetCpuOptionCfgString() { return this->cpu_option_cfg_string_; }
private:
int ParseDataPreProcessString(const std::map<std::string, std::map<std::string, std::string>> &maps);
@ -124,6 +130,7 @@ class ConfigFileParser {
int SetMapData(const std::map<std::string, std::string> &input_map,
const std::map<std::string, std::string &> &parse_map, const std::string &section);
int ParseMicroParamString(const std::map<std::string, std::map<std::string, std::string>> &maps);
int ParseCpuOptionCfgString(const std::map<std::string, std::map<std::string, std::string>> &maps);
private:
DataPreProcessString data_pre_process_string_;
@ -134,6 +141,7 @@ class ConfigFileParser {
RegistryInfoString registry_info_string_;
AclOptionCfgString acl_option_cfg_string_;
MicroParamString micro_param_string_;
CpuOptionCfgString cpu_option_cfg_string_;
};
} // namespace lite

View File

@ -0,0 +1,41 @@
/**
* Copyright 2023 Huawei Technologies Co., Ltd
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#include "tools/converter/config_parser/cpu_option_param_parser.h"
namespace mindspore {
namespace lite {
STATUS CpuOptionParamParser::ParseCpuOptionCfg(const CpuOptionCfgString &cpu_option_string,
CpuOptionCfg *cpu_option_cfg) {
if (cpu_option_string.architecture.empty() || cpu_option_string.instruction.empty()) {
return RET_OK;
}
if (cpu_option_string.architecture != "ARM64") {
MS_LOG(ERROR) << "cpu instruction only supported ARM64. But get " << cpu_option_string.architecture;
return RET_INPUT_PARAM_INVALID;
}
if (cpu_option_string.instruction != "SIMD_DOT") {
MS_LOG(ERROR) << "cpu instruction only supported SIMD_DOT. But get " << cpu_option_string.instruction;
return RET_INPUT_PARAM_INVALID;
}
cpu_option_cfg->instruction = cpu_option_string.instruction;
cpu_option_cfg->architecture = cpu_option_string.architecture;
return RET_OK;
}
} // namespace lite
} // namespace mindspore

View File

@ -0,0 +1,32 @@
/**
* Copyright 2023 Huawei Technologies Co., Ltd
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#ifndef MINDSPORE_LITE_TOOLS_CONVERTER_CONFIG_PARSER_CPU_OPTION_PARAM_PARSER_H_
#define MINDSPORE_LITE_TOOLS_CONVERTER_CONFIG_PARSER_CPU_OPTION_PARAM_PARSER_H_
#include <string>
#include "tools/converter/cxx_api/converter_para.h"
#include "tools/converter/config_parser/config_file_parser.h"
#include "include/errorcode.h"
namespace mindspore {
namespace lite {
class CpuOptionParamParser {
public:
STATUS ParseCpuOptionCfg(const CpuOptionCfgString &cpu_option_string, CpuOptionCfg *cpu_option_cfg);
};
} // namespace lite
} // namespace mindspore
#endif // MINDSPORE_LITE_TOOLS_CONVERTER_CONFIG_PARSER_CPU_OPTION_PARAM_PARSER_H_

View File

@ -53,6 +53,8 @@
#include "src/common/file_utils.h"
#include "ops/dynamic_shape.h"
#include "tools/common/parse_config_utils.h"
#include "tools/converter/converter_packed_node.h"
#include "tools/converter/config_parser/cpu_option_param_parser.h"
namespace mindspore {
extern "C" {
@ -348,6 +350,13 @@ int ConverterImpl::InitConfigParam(const std::shared_ptr<ConverterPara> &param)
MS_LOG(ERROR) << "Parse micro param failed.";
return ret;
}
lite::CpuOptionParamParser cpu_param_parser;
ret = cpu_param_parser.ParseCpuOptionCfg(config_parser.GetCpuOptionCfgString(), &param->cpuOptionCfgParam);
if (ret != RET_OK) {
MS_LOG(ERROR) << "Parse cpu option param failed.";
return ret;
}
return RET_OK;
}
@ -817,6 +826,16 @@ int ConverterImpl::SaveGraph(FuncGraphPtr graph, const std::shared_ptr<Converter
MS_LOG(ERROR) << "Convert to meta graph failed";
return RET_ERROR;
}
if (!param->cpuOptionCfgParam.architecture.empty()) {
std::string cpu_option = param->cpuOptionCfgParam.architecture + param->cpuOptionCfgParam.instruction;
status = ConverterPackedNode(meta_graph, cpu_option);
if (status != RET_OK) {
MS_LOG(ERROR) << "save pack info failed.";
return status;
}
}
meta_graph->version = Version();
if (param->pre_infer) {

View File

@ -55,6 +55,7 @@
#include "tools/converter/parser/unify_format.h"
#include "tools/optimizer/graph/specify_graph_input_format.h"
#include "tools/converter/anf_transform.h"
#include "tools/converter/offline_packing_optimizer.h"
namespace mindspore {
namespace lite {
@ -311,6 +312,14 @@ STATUS ConverterFuncGraph::Optimize(const std::shared_ptr<ConverterPara> &param,
func_graph->set_attr(kIsOptimized, MakeValue(true));
}
if (!param->cpuOptionCfgParam.architecture.empty()) {
// Do offline pack.
if (OfflinePackingOptimizer().Optimize(func_graph, "ANDROID_ARM_CPU") != RET_OK) {
MS_LOG(ERROR) << "Do offline packing failed.";
return status;
}
}
return RET_OK;
}

View File

@ -0,0 +1,150 @@
/**
* Copyright 2023 Huawei Technologies Co., Ltd
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#include <vector>
#include <memory>
#include <utility>
#include "tools/converter/converter_packed_node.h"
#include "tools/converter/offline_packing_optimizer.h"
#include "src/litert/kernel/cpu/int8/matmul_dynamic_base_int8.h"
#include "mindspore/core/ops/op_name.h"
namespace mindspore {
namespace {
constexpr auto kMatmulCustomType = "MatmulFusionPacked";
}
namespace lite {
void AddCustomAttr(std::vector<std::unique_ptr<mindspore::schema::AttributeT>> *attrs, const std::string &&key,
const std::string &&value) {
auto attr = std::make_unique<schema::AttributeT>();
attr->name = key;
std::vector<uint8_t> attr_data(value.begin(), value.end());
attr->data = attr_data;
attrs->emplace_back(std::move(attr));
}
int ReplaceMatMulFusionToCustom(schema::MetaGraphT *meta_graph, const std::unique_ptr<schema::CNodeT> &cnode,
const std::unique_ptr<mindspore::schema::TensorT> &b_input,
const std::string &cpu_option) {
auto lite_kernel = PackDataWrapper::GetInstance().GetPackedKernel(cnode->name);
if (lite_kernel == nullptr) {
MS_LOG(ERROR) << "Get Packed Kernel error.";
return RET_ERROR;
}
auto param = lite_kernel->op_parameter();
if (param == nullptr) {
MS_LOG(ERROR) << "param is nullptr.";
return RET_ERROR;
}
auto matmul_param = reinterpret_cast<MatMulParameter *>(param);
if (matmul_param->matmul_type_ == kMatmulDynamicSdotInt8Cpu) {
cnode->primitive->value.type = schema::PrimitiveType_Custom;
auto primitive = new (std::nothrow) schema::CustomT;
if (primitive == nullptr) {
MS_LOG(ERROR) << "new CustomT error.";
return RET_NULL_PTR;
}
primitive->type = kMatmulCustomType;
// activation_type
AddCustomAttr(&(primitive->attr), ops::kActivationType, std::to_string(matmul_param->act_type_));
// transpose_a
AddCustomAttr(&(primitive->attr), ops::kTransposeA, std::to_string(matmul_param->a_transpose_));
// transpose_b
AddCustomAttr(&(primitive->attr), ops::kTransposeB, std::to_string(matmul_param->b_transpose_));
// replace packed data
auto matmul_kernel = reinterpret_cast<const mindspore::kernel::MatmulDynamicBaseInt8CPUKernel *>(lite_kernel);
auto b_batch = matmul_kernel->GetBBatch();
auto pack_b_size = b_batch * matmul_param->col_align_ * matmul_param->deep_align_ * sizeof(int8_t);
b_input->data.resize(pack_b_size);
if (memcpy_s(b_input->data.data(), b_input->data.size(), matmul_kernel->GetPackBPtr(), pack_b_size) != EOK) {
delete primitive;
MS_LOG(ERROR) << "new CustomT error.";
return RET_ERROR;
}
// add weight_sums to inputs
auto weight_sum_size = b_batch * matmul_param->col_align_ * sizeof(int);
auto weight_sums_tensor = std::make_unique<schema::TensorT>();
weight_sums_tensor->nodeType = lite::NodeType_ValueNode;
weight_sums_tensor->format = schema::Format_NHWC;
weight_sums_tensor->dataType = TypeId::kNumberTypeInt32;
weight_sums_tensor->dims = {};
weight_sums_tensor->dims.emplace_back(weight_sum_size / sizeof(int));
weight_sums_tensor->data.resize(weight_sum_size);
weight_sums_tensor->name = cnode->name + "_weight_sums";
if (memcpy_s(weight_sums_tensor->data.data(), weight_sums_tensor->data.size(), matmul_kernel->GetWeightSums(),
weight_sum_size) != EOK) {
delete primitive;
MS_LOG(ERROR) << "new CustomT error.";
return RET_ERROR;
}
cnode->inputIndex.emplace_back(meta_graph->allTensors.size());
meta_graph->allTensors.emplace_back(std::move(weight_sums_tensor));
// add scalar to attr
AddCustomAttr(&(primitive->attr), "b_batch", std::to_string(b_batch));
AddCustomAttr(&(primitive->attr), "deep", std::to_string(matmul_param->deep_));
AddCustomAttr(&(primitive->attr), "col", std::to_string(matmul_param->col_));
AddCustomAttr(&(primitive->attr), "col_align", std::to_string(matmul_param->col_align_));
AddCustomAttr(&(primitive->attr), "deep_align", std::to_string(matmul_param->deep_align_));
// add cpu option
std::string cpu_option_str = cpu_option;
AddCustomAttr(&(primitive->attr), "cpu_option", std::move(cpu_option_str));
cnode->primitive->value.value = primitive;
}
return RET_OK;
}
int ConverterPackedNode(schema::MetaGraphT *meta_graph, const std::string &cpu_option) {
for (auto &dst_node : meta_graph->nodes) {
if (dst_node->primitive == nullptr || dst_node->primitive->value.type != schema::PrimitiveType_MatMulFusion) {
continue;
}
MS_CHECK_TRUE_MSG(dst_node->inputIndex.size() >= kInputSize1, RET_ERROR, "inputs size is wrong.");
auto a_index = dst_node->inputIndex[FIRST_INPUT];
MS_CHECK_TRUE_MSG(meta_graph->allTensors.size() > a_index, RET_ERROR, "allTensors size is wrong.");
auto &a_input = meta_graph->allTensors.at(a_index);
CHECK_NULL_RETURN(a_input);
auto b_index = dst_node->inputIndex[SECOND_INPUT];
MS_CHECK_TRUE_MSG(meta_graph->allTensors.size() > b_index, RET_ERROR, "allTensors size is wrong.");
auto &b_input = meta_graph->allTensors.at(b_index);
CHECK_NULL_RETURN(b_input);
if (a_input->dataType != b_input->dataType) {
MS_LOG(ERROR) << "inputs dataType is not same." << a_input->dataType << " " << b_input->dataType;
return RET_ERROR;
}
if (b_input->data.empty()) {
continue;
}
auto ret = ReplaceMatMulFusionToCustom(meta_graph, dst_node, b_input, cpu_option);
if (ret != RET_OK) {
MS_LOG(ERROR) << "ReplaceMatmulToCustom error.";
return ret;
}
}
return RET_OK;
}
} // namespace lite
} // namespace mindspore

View File

@ -0,0 +1,29 @@
/**
* Copyright 2023 Huawei Technologies Co., Ltd
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#ifndef MINDSPORE_LITE_TOOLS_CONVERTER_CONVERT_PACKED_NODE_H
#define MINDSPORE_LITE_TOOLS_CONVERTER_CONVERT_PACKED_NODE_H
#include <string>
#include "schema/inner/model_generated.h"
namespace mindspore {
namespace lite {
int ConverterPackedNode(schema::MetaGraphT *meta_graph, const std::string &cpu_option);
} // namespace lite
} // namespace mindspore
#endif // MINDSPORE_LITE_TOOLS_CONVERTER_CONVERT_PACKED_NODE_H

View File

@ -35,6 +35,11 @@ struct ParallelSplitConfig {
std::vector<std::string> parallel_devices_;
};
struct CpuOptionCfg {
std::string architecture;
std::string instruction;
};
struct ConverterPara {
converter::FmkType fmk_type;
std::string model_file;
@ -73,6 +78,7 @@ struct ConverterPara {
lite::micro::MicroParam microParam;
ParallelSplitConfig parallel_split_config;
std::string device;
CpuOptionCfg cpuOptionCfgParam;
};
} // namespace mindspore
#endif // MINDSPORE_LITE_TOOLS_CONVERTER_CXX_API_CONVERTER_PARA_H_

View File

@ -33,7 +33,7 @@ STATUS MatmulPacking(const mindspore::CNodePtr &cnode_ptr, const FuncGraphPtr &f
const lite::InnerContext *ctx);
mindspore::lite::InnerContext *InitInnerContextForAndroidArmCpu();
enum class BackendType : uint {
enum class BackendType : uint8_t {
kUnknownBackend = 0,
kAndroidArmCpuBackend,
};