forked from mindspore-Ecosystem/mindspore
pack optimize
This commit is contained in:
parent
985c48e543
commit
ac7b243a5f
|
@ -142,6 +142,7 @@ set(LITE_SRC
|
|||
${CMAKE_CURRENT_SOURCE_DIR}/litert/sub_graph_kernel.cc
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/litert/scheduler.cc
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/litert/lite_session.cc
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/litert/runtime_packed_node_pass.cc
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/litert/model_manager.cc
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/errorcode.cc
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/litert/cpu_info.cc
|
||||
|
|
|
@ -125,7 +125,7 @@ void MatmulDynamicBaseInt8CPUKernel::FreeTmpBuffer() {
|
|||
free(pack_a_ptr_);
|
||||
pack_a_ptr_ = nullptr;
|
||||
}
|
||||
if (pack_b_ptr_ != nullptr) {
|
||||
if (pack_b_ptr_ != nullptr && !weight_is_packed_) {
|
||||
free(pack_b_ptr_);
|
||||
pack_b_ptr_ = nullptr;
|
||||
}
|
||||
|
@ -133,7 +133,7 @@ void MatmulDynamicBaseInt8CPUKernel::FreeTmpBuffer() {
|
|||
free(input_sums_);
|
||||
input_sums_ = nullptr;
|
||||
}
|
||||
if (weight_sums_ != nullptr) {
|
||||
if (weight_sums_ != nullptr && !weight_is_packed_) {
|
||||
free(weight_sums_);
|
||||
weight_sums_ = nullptr;
|
||||
}
|
||||
|
@ -162,6 +162,12 @@ int MatmulDynamicBaseInt8CPUKernel::InitInputQuantParam() {
|
|||
}
|
||||
|
||||
int MatmulDynamicBaseInt8CPUKernel::TransferB() {
|
||||
if (weight_is_packed_) {
|
||||
CHECK_NULL_RETURN(weight_sums_tensor_);
|
||||
pack_b_ptr_ = static_cast<int8_t *>(in_tensors_.at(kWeightIndex)->data());
|
||||
weight_sums_ = static_cast<int *>(weight_sums_tensor_->data());
|
||||
return RET_OK;
|
||||
}
|
||||
auto weight_data = reinterpret_cast<int8_t *>(in_tensors_.at(kWeightIndex)->data());
|
||||
CHECK_NULL_RETURN(weight_data);
|
||||
for (int i = 0; i < b_batch_; i++) {
|
||||
|
@ -177,6 +183,7 @@ int MatmulDynamicBaseInt8CPUKernel::TransferB() {
|
|||
CalcWeightSums(current_weight, param_->deep_, param_->col_, current_sums, RowMajor);
|
||||
}
|
||||
}
|
||||
|
||||
return RET_OK;
|
||||
}
|
||||
|
||||
|
@ -205,6 +212,10 @@ int MatmulDynamicBaseInt8CPUKernel::InitMatrixABuffer() {
|
|||
}
|
||||
|
||||
int MatmulDynamicBaseInt8CPUKernel::InitMatrixBBuffer() {
|
||||
if (weight_is_packed_) {
|
||||
return RET_OK;
|
||||
}
|
||||
|
||||
if (pack_b_ptr_ != nullptr) {
|
||||
free(pack_b_ptr_);
|
||||
pack_b_ptr_ = nullptr;
|
||||
|
|
|
@ -42,6 +42,12 @@ class MatmulDynamicBaseInt8CPUKernel : public LiteKernel {
|
|||
static int InitBroadcastParams(const std::vector<int> &a_shape_const, const std::vector<int> &b_shape_const,
|
||||
MatMulParameter *params, std::vector<int> *a_offsets, std::vector<int> *b_offsets);
|
||||
|
||||
const int8_t *GetPackBPtr() const { return pack_b_ptr_; }
|
||||
const int *GetWeightSums() const { return weight_sums_; }
|
||||
const int GetBBatch() const { return b_batch_; }
|
||||
void SetWeightIsPacked(bool weight_is_packed) { this->weight_is_packed_ = weight_is_packed; }
|
||||
void SetWeightSumsTensor(lite::Tensor *weight_sums_tensor) { this->weight_sums_tensor_ = weight_sums_tensor; }
|
||||
|
||||
private:
|
||||
void ResizeMatrixBParameter();
|
||||
int CopyBias();
|
||||
|
@ -90,6 +96,8 @@ class MatmulDynamicBaseInt8CPUKernel : public LiteKernel {
|
|||
int thread_stride_ = 0;
|
||||
bool enable_fp16_ = false;
|
||||
PackFunc b_pack_func_ = nullptr;
|
||||
bool weight_is_packed_ = false;
|
||||
lite::Tensor *weight_sums_tensor_ = nullptr;
|
||||
};
|
||||
} // namespace mindspore::kernel
|
||||
|
||||
|
|
|
@ -62,6 +62,7 @@
|
|||
#include "kernel/ascend/plugin/ascend_kernel_plugin.h"
|
||||
#endif
|
||||
#include "thread/parallel_thread_pool_manager.h"
|
||||
#include "src/litert/runtime_packed_node_pass.h"
|
||||
|
||||
using AbstractBaseModel = mindspore::infer::AbstractBaseModel;
|
||||
|
||||
|
@ -585,6 +586,8 @@ int LiteSession::CompileGraph(Model *model) {
|
|||
InitGraphInputTensors(model);
|
||||
InitGraphOutputTensors(model);
|
||||
|
||||
PackedNodePass::GetInstance().Run(model, tensors_);
|
||||
|
||||
// scheduler kernels
|
||||
Scheduler scheduler(context_.get(), ms_context_, model, &tensors_, &inputs_, &outputs_, is_train_session_,
|
||||
&is_infershape_, &is_control_flow_, &infer_along_running_, execution_plan_, delegate_,
|
||||
|
@ -698,6 +701,11 @@ int LiteSession::PrepareKernels(const Model *model) {
|
|||
return RET_ERROR;
|
||||
}
|
||||
for (auto &node : subgraph_kernel->nodes()) {
|
||||
ret = PackKernelExec(node, tensors_);
|
||||
if (ret != RET_OK) {
|
||||
MS_LOG(ERROR) << "Pack KernelExec failed.";
|
||||
return ret;
|
||||
}
|
||||
ret = node->Prepare();
|
||||
if (ret != RET_OK) {
|
||||
MS_LOG(ERROR) << "node: " << node->name() << " prepare failed.";
|
||||
|
|
|
@ -0,0 +1,261 @@
|
|||
/**
|
||||
* Copyright 2023 Huawei Technologies Co., Ltd
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
#include "src/litert/runtime_packed_node_pass.h"
|
||||
#include "nnacl/op_base.h"
|
||||
#include "src/litert/kernel/cpu/int8/matmul_dynamic_base_int8.h"
|
||||
|
||||
using RecoveryWeightFunc = void (*)(void *, void *, int, int, bool);
|
||||
namespace mindspore {
|
||||
namespace {
|
||||
constexpr size_t kFlatbuffersBuilderInitSize = 1024;
|
||||
constexpr auto kActivationType = "activation_type";
|
||||
constexpr auto kTransposeA = "transpose_a";
|
||||
constexpr auto kTransposeB = "transpose_b";
|
||||
constexpr auto kArm64SimdDot = "ARM64SIMD_DOT";
|
||||
} // namespace
|
||||
|
||||
namespace lite {
|
||||
PackedNodePass::~PackedNodePass() {
|
||||
for (auto &pack_info : node_pack_info_map_) {
|
||||
delete pack_info.second;
|
||||
}
|
||||
node_pack_info_map_.clear();
|
||||
}
|
||||
|
||||
void PackedNodePass::Run(Model *model, const std::vector<Tensor *> &tensors) {
|
||||
for (auto &node : model->graph_.all_nodes_) {
|
||||
MS_ASSERT(node != nullptr);
|
||||
if (node->node_type_ != schema::PrimitiveType_Custom) {
|
||||
continue;
|
||||
}
|
||||
auto *primitive = reinterpret_cast<const schema::Primitive *>(node->primitive_);
|
||||
if (primitive == nullptr) {
|
||||
MS_LOG(ERROR) << "Op " << node->name_ << " should exist in model!";
|
||||
return;
|
||||
}
|
||||
auto custom = primitive->value_as_Custom();
|
||||
if (custom == nullptr || custom->type() == nullptr) {
|
||||
MS_LOG(ERROR) << "Custom node is nullptr";
|
||||
return;
|
||||
}
|
||||
auto custom_type = custom->type()->str();
|
||||
if (custom_type != "MatmulFusionPacked") {
|
||||
continue;
|
||||
}
|
||||
flatbuffers::FlatBufferBuilder fbb(kFlatbuffersBuilderInitSize);
|
||||
|
||||
auto custom_attr = custom->attr();
|
||||
std::map<std::string, std::string> attr_map;
|
||||
for (size_t i = 0; i < custom_attr->size(); ++i) {
|
||||
auto attr = custom_attr->Get(i);
|
||||
auto attr_key = attr->name()->str();
|
||||
auto data_bytes = attr->data();
|
||||
int data_size = static_cast<int>(data_bytes->size());
|
||||
std::string attr_value;
|
||||
for (int j = 0; j < data_size; j++) {
|
||||
attr_value.push_back(static_cast<char>(data_bytes->Get(j)));
|
||||
}
|
||||
attr_map[attr_key] = attr_value;
|
||||
}
|
||||
if (attr_map.find(kActivationType) == attr_map.end() || attr_map.find(kTransposeA) == attr_map.end() ||
|
||||
attr_map.find(kTransposeB) == attr_map.end()) {
|
||||
MS_LOG(ERROR) << "Custom attr error.";
|
||||
return;
|
||||
}
|
||||
auto val_offset = schema::CreateMatMulFusion(
|
||||
fbb, std::atoi(attr_map[kTransposeA].c_str()), std::atoi(attr_map[kTransposeB].c_str()),
|
||||
static_cast<schema::ActivationType>(std::atoi(attr_map[kActivationType].c_str())));
|
||||
auto prim_offset = schema::CreatePrimitive(fbb, schema::PrimitiveType_MatMulFusion, val_offset.o);
|
||||
fbb.Finish(prim_offset);
|
||||
void *prim = malloc(fbb.GetSize());
|
||||
if (prim == nullptr) {
|
||||
MS_LOG(ERROR) << "malloc primitive failed.";
|
||||
return;
|
||||
}
|
||||
memcpy(prim, fbb.GetBufferPointer(), fbb.GetSize());
|
||||
auto custom_primitive = flatbuffers::GetRoot<schema::Primitive>(prim);
|
||||
fbb.Clear();
|
||||
PackInfo *pack_info = new (std::nothrow) PackInfo();
|
||||
if (pack_info == nullptr) {
|
||||
free(prim);
|
||||
MS_LOG(ERROR) << "new PackInfo failed.";
|
||||
return;
|
||||
}
|
||||
node->primitive_ = custom_primitive;
|
||||
pack_info->is_packed_ = true;
|
||||
pack_info->weight_sums_index_ = node->input_indices_.back();
|
||||
pack_info->b_batch_ = std::atoi(attr_map["b_batch"].c_str());
|
||||
pack_info->col_ = std::atoi(attr_map["col"].c_str());
|
||||
pack_info->deep_ = std::atoi(attr_map["deep"].c_str());
|
||||
pack_info->col_align_ = std::atoi(attr_map["col_align"].c_str());
|
||||
pack_info->deep_align_ = std::atoi(attr_map["deep_align"].c_str());
|
||||
pack_info->b_transpose_ = std::atoi(attr_map[kTransposeB].c_str());
|
||||
pack_info->cpu_option_ = attr_map["cpu_option"];
|
||||
AddNodePackInfo(node->name_, pack_info);
|
||||
node->input_indices_.pop_back();
|
||||
node->node_type_ = schema::PrimitiveType_MatMulFusion;
|
||||
}
|
||||
|
||||
if (!(reinterpret_cast<lite::LiteModel *>(model)->keep_model_buf())) {
|
||||
CopyWeightBiasSumsTensor(tensors);
|
||||
}
|
||||
}
|
||||
|
||||
void PackedNodePass::CopyWeightBiasSumsTensor(const std::vector<Tensor *> &tensors) {
|
||||
for (auto &pack_info : node_pack_info_map_) {
|
||||
auto index = static_cast<size_t>(pack_info.second->weight_sums_index_);
|
||||
if (index > tensors.size()) {
|
||||
return;
|
||||
}
|
||||
auto tensor = tensors[index];
|
||||
if (!tensor->IsConst() && tensor->data() != nullptr) {
|
||||
return;
|
||||
}
|
||||
if (!tensor->IsConst() || tensor->own_data()) {
|
||||
continue;
|
||||
}
|
||||
if (tensor->data_type() == kObjectTypeTensorType) {
|
||||
MS_ASSERT(tensor->data() == nullptr);
|
||||
} else {
|
||||
auto copy_tensor = Tensor::CopyTensor(*tensor, true);
|
||||
if (copy_tensor == nullptr) {
|
||||
MS_LOG(ERROR) << "Copy tensor failed";
|
||||
return;
|
||||
}
|
||||
tensor->FreeData();
|
||||
tensor->set_data(copy_tensor->data());
|
||||
tensor->set_own_data(true);
|
||||
copy_tensor->set_data(nullptr);
|
||||
delete copy_tensor;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void MatmulDynamicSdotInt8Cpu(void *src, void *dst, int row, int col, bool transpose) {
|
||||
auto src_int8 = static_cast<int8_t *>(src);
|
||||
auto dst_int8 = static_cast<int8_t *>(dst);
|
||||
if (!transpose) {
|
||||
// RowMajor2Col4x16MajorInt8
|
||||
int row_4 = UP_ROUND(row, C4NUM);
|
||||
int stride = C16NUM * C4NUM;
|
||||
for (int r = 0; r < row_4; ++r) {
|
||||
for (int c = 0; c < col; ++c) {
|
||||
int stride_idx = c / C16NUM * (row_4 / C4NUM) + r / C4NUM;
|
||||
if (r < row) {
|
||||
int src_idx = r * col + c;
|
||||
src_int8[src_idx] = dst_int8[stride * stride_idx + c % C16NUM * C4NUM + r % C4NUM];
|
||||
}
|
||||
}
|
||||
}
|
||||
} else {
|
||||
int temp = row;
|
||||
row = col;
|
||||
col = temp;
|
||||
// RowMajor2Row4x16MajorInt8
|
||||
int col4 = UP_ROUND(col, C4NUM);
|
||||
for (int r = 0; r < row; r++) {
|
||||
int rd16 = r / C16NUM;
|
||||
int rm16 = r % C16NUM;
|
||||
for (int c = 0; c < col; c++) {
|
||||
int cd4 = c / C4NUM;
|
||||
int cm4 = c % C4NUM;
|
||||
int dst_index = rd16 * col4 * C16NUM + cd4 * C16NUM * C4NUM + rm16 * C4NUM + cm4;
|
||||
int src_index = r * col + c;
|
||||
src_int8[src_index] = dst_int8[dst_index];
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
RecoveryWeightFunc GetRecoveryWeightFunc(const int quant_type, const TypeId data_type, const int node_type,
|
||||
const std::string &cpu_option) {
|
||||
if (cpu_option == kArm64SimdDot && node_type == schema::PrimitiveType_MatMulFusion &&
|
||||
quant_type == schema::QuantType_QUANT_DYNAMIC && data_type == kNumberTypeInt8) {
|
||||
return MatmulDynamicSdotInt8Cpu;
|
||||
}
|
||||
return nullptr;
|
||||
}
|
||||
|
||||
int PackedMatmulKernelExec(kernel::KernelExec *kernel_exec, const std::vector<Tensor *> &tensors) {
|
||||
auto pack_info = PackedNodePass::GetInstance().GetNodePackInfo(kernel_exec->name());
|
||||
if (pack_info == nullptr) {
|
||||
return RET_OK;
|
||||
}
|
||||
MS_CHECK_TRUE_MSG(kernel_exec->in_tensors().size() >= kInputSize1, lite::RET_ERROR,
|
||||
"kernel doesn't have weight tensor.");
|
||||
auto dst_tensor = kernel_exec->in_tensors()[SECOND_INPUT];
|
||||
auto kernel = kernel_exec->kernel();
|
||||
MS_CHECK_TRUE_MSG(kernel != nullptr, lite::RET_NULL_PTR, "kernel is nullptr.");
|
||||
auto param = reinterpret_cast<MatMulParameter *>(kernel_exec->op_parameter());
|
||||
if (dst_tensor->data_type() != kNumberTypeInt8 || kernel->quant_type() != schema::QuantType_QUANT_DYNAMIC) {
|
||||
return RecoveryPackedWeight(dst_tensor, static_cast<int>(kernel->quant_type()), dst_tensor->data_type(),
|
||||
schema::PrimitiveType_MatMulFusion, pack_info);
|
||||
}
|
||||
|
||||
if (param->matmul_type_ != kMatmulDynamicSdotInt8Cpu && pack_info->cpu_option_ == kArm64SimdDot) {
|
||||
return RecoveryPackedWeight(dst_tensor, static_cast<int>(kernel->quant_type()), dst_tensor->data_type(),
|
||||
schema::PrimitiveType_MatMulFusion, pack_info);
|
||||
}
|
||||
auto matmul_kernel = static_cast<kernel::MatmulDynamicBaseInt8CPUKernel *>(kernel);
|
||||
matmul_kernel->SetWeightIsPacked(true);
|
||||
auto index = static_cast<size_t>(pack_info->weight_sums_index_);
|
||||
if (index < tensors.size()) {
|
||||
matmul_kernel->SetWeightSumsTensor(tensors.at(index));
|
||||
}
|
||||
|
||||
return lite::RET_OK;
|
||||
}
|
||||
|
||||
int RecoveryPackedWeight(Tensor *weight, const int quant_type, const TypeId data_type, const int node_type,
|
||||
PackInfo *pack_info) {
|
||||
auto recovery_func = GetRecoveryWeightFunc(quant_type, data_type, node_type, pack_info->cpu_option_);
|
||||
if (recovery_func == nullptr) {
|
||||
MS_LOG(ERROR) << "unsupported recovery func.";
|
||||
return RET_NULL_PTR;
|
||||
}
|
||||
void *unpack_data = malloc(weight->Size());
|
||||
if (unpack_data == nullptr) {
|
||||
MS_LOG(ERROR) << "malloc unpack_data failed.";
|
||||
return RET_NULL_PTR;
|
||||
}
|
||||
void *pack_b_ptr = weight->data();
|
||||
for (int i = 0; i < pack_info->b_batch_; i++) {
|
||||
void *current_weight;
|
||||
void *current_b_pack;
|
||||
if (weight->data_type() == kNumberTypeInt8) {
|
||||
current_weight = static_cast<void *>(static_cast<int8_t *>(unpack_data) + i * pack_info->deep_ * pack_info->col_);
|
||||
current_b_pack =
|
||||
static_cast<void *>(static_cast<int8_t *>(pack_b_ptr) + i * pack_info->col_align_ * pack_info->deep_align_);
|
||||
} else {
|
||||
free(unpack_data);
|
||||
MS_LOG(ERROR) << "unsupported data type.";
|
||||
return RET_ERROR;
|
||||
}
|
||||
recovery_func(current_weight, current_b_pack, pack_info->deep_, pack_info->col_, pack_info->b_transpose_);
|
||||
}
|
||||
weight->FreeData();
|
||||
weight->set_data(unpack_data);
|
||||
return RET_OK;
|
||||
}
|
||||
|
||||
int PackKernelExec(kernel::KernelExec *kernel_exec, const std::vector<Tensor *> &tensors) {
|
||||
if (kernel_exec->type() == schema::PrimitiveType_MatMulFusion) {
|
||||
return PackedMatmulKernelExec(kernel_exec, tensors);
|
||||
}
|
||||
return RET_OK;
|
||||
}
|
||||
} // namespace lite
|
||||
} // namespace mindspore
|
|
@ -0,0 +1,80 @@
|
|||
/**
|
||||
* Copyright 2023 Huawei Technologies Co., Ltd
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
#ifndef MINDSPORE_LITE_SRC_LITERT_RUNTIME_PACKED_NODE_PASS_
|
||||
#define MINDSPORE_LITE_SRC_LITERT_RUNTIME_PACKED_NODE_PASS_
|
||||
|
||||
#include <string>
|
||||
#include <map>
|
||||
#include <vector>
|
||||
#include "src/litert/lite_model.h"
|
||||
#include "src/tensor.h"
|
||||
#include "src/litert/kernel_exec.h"
|
||||
|
||||
namespace mindspore {
|
||||
namespace lite {
|
||||
struct PackInfo {
|
||||
bool is_packed_{false};
|
||||
int weight_sums_index_;
|
||||
int b_batch_;
|
||||
int deep_;
|
||||
int col_;
|
||||
int deep_align_;
|
||||
int col_align_;
|
||||
bool b_transpose_;
|
||||
std::string cpu_option_;
|
||||
};
|
||||
|
||||
class PackedNodePass {
|
||||
public:
|
||||
static PackedNodePass &GetInstance() {
|
||||
static PackedNodePass instance;
|
||||
return instance;
|
||||
}
|
||||
|
||||
PackInfo *GetNodePackInfo(const std::string &node_name) {
|
||||
if (this->node_pack_info_map_.find(node_name) == this->node_pack_info_map_.end()) {
|
||||
return nullptr;
|
||||
}
|
||||
return this->node_pack_info_map_[node_name];
|
||||
}
|
||||
void Run(Model *model, const std::vector<Tensor *> &tensors);
|
||||
void CopyWeightBiasSumsTensor(const std::vector<Tensor *> &tensors);
|
||||
|
||||
protected:
|
||||
void AddNodePackInfo(const std::string &node_name, PackInfo *pack_info) {
|
||||
if (this->node_pack_info_map_.find(node_name) != this->node_pack_info_map_.end()) {
|
||||
MS_LOG(WARNING) << "Key conflict when add weight sums index.";
|
||||
}
|
||||
this->node_pack_info_map_[node_name] = pack_info;
|
||||
}
|
||||
|
||||
private:
|
||||
PackedNodePass() = default;
|
||||
~PackedNodePass();
|
||||
|
||||
private:
|
||||
std::map<std::string, PackInfo *> node_pack_info_map_;
|
||||
};
|
||||
|
||||
int PackKernelExec(kernel::KernelExec *kernel_exec, const std::vector<Tensor *> &tensors);
|
||||
|
||||
// packed weight data -> unpack
|
||||
int RecoveryPackedWeight(Tensor *weight, const int quant_type, const TypeId data_type, const int node_type,
|
||||
PackInfo *packInfo);
|
||||
} // namespace lite
|
||||
} // namespace mindspore
|
||||
#endif // MINDSPORE_LITE_SRC_LITERT_RUNTIME_PACKED_NODE_PASS_
|
|
@ -48,6 +48,8 @@ include_directories(${TOP_DIR}/mindspore/ccsrc/plugin/device/cpu/kernel)
|
|||
file(GLOB_RECURSE CONVERTER_SRC RELATIVE ${CMAKE_CURRENT_SOURCE_DIR}
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/ops/*.cc
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/converter.cc
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/offline_packing_optimizer.cc
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/converter_packed_node.cc
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/converter_funcgraph.cc
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/converter_metagraph.cc
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/anf_transform.cc
|
||||
|
@ -167,6 +169,7 @@ set(LITE_SRC ${API_SRC}
|
|||
${SRC_DIR}/litert/sub_graph_split.cc
|
||||
${KERNEL_ONLINE_FUSION_SRC}
|
||||
${SRC_DIR}/litert/lite_session.cc
|
||||
${SRC_DIR}/litert/runtime_packed_node_pass.cc
|
||||
${SRC_DIR}/litert/executor.cc
|
||||
${SRC_DIR}/litert/lite_model.cc
|
||||
${SRC_DIR}/litert/model_manager.cc
|
||||
|
|
|
@ -35,6 +35,7 @@ constexpr auto kDataPreprocessParam = "data_preprocess_param";
|
|||
constexpr auto kRegistry = "registry";
|
||||
constexpr auto kAclOptionParam = "acl_option_cfg_param";
|
||||
constexpr auto kMicroParam = "micro_param";
|
||||
constexpr auto kCpuOptionParam = "cpu_option_cfg_param";
|
||||
} // namespace
|
||||
using ShapeVector = std::vector<int64_t>;
|
||||
const int kBatchDim = 0;
|
||||
|
@ -286,6 +287,12 @@ int ConfigFileParser::ParseConfigParam(std::map<std::string, std::map<std::strin
|
|||
MS_LOG(ERROR) << "ParseWeightQuantString failed.";
|
||||
return ret;
|
||||
}
|
||||
ret = ParseCpuOptionCfgString(*maps);
|
||||
(void)maps->erase(kCpuOptionParam);
|
||||
if (ret != RET_OK) {
|
||||
MS_LOG(ERROR) << "ParseCpuOptionCfgString failed.";
|
||||
return ret;
|
||||
}
|
||||
return RET_OK;
|
||||
}
|
||||
|
||||
|
@ -425,5 +432,15 @@ int ConfigFileParser::ParseWeightQuantString(const std::map<std::string, std::ma
|
|||
}
|
||||
return RET_OK;
|
||||
}
|
||||
|
||||
int ConfigFileParser::ParseCpuOptionCfgString(const std::map<std::string, std::map<std::string, std::string>> &maps) {
|
||||
if (maps.find(kCpuOptionParam) != maps.end()) {
|
||||
const auto &map = maps.at(kCpuOptionParam);
|
||||
std::map<std::string, std::string &> parse_map{{"architecture", cpu_option_cfg_string_.architecture},
|
||||
{"instruction", cpu_option_cfg_string_.instruction}};
|
||||
return SetMapData(map, parse_map, kCpuOptionParam);
|
||||
}
|
||||
return RET_OK;
|
||||
}
|
||||
} // namespace lite
|
||||
} // namespace mindspore
|
||||
|
|
|
@ -98,6 +98,11 @@ struct MicroParamString {
|
|||
std::string enable_micro;
|
||||
};
|
||||
|
||||
struct CpuOptionCfgString {
|
||||
std::string architecture;
|
||||
std::string instruction;
|
||||
};
|
||||
|
||||
class ConfigFileParser {
|
||||
public:
|
||||
int ParseConfigFile(const std::string &config_file_path);
|
||||
|
@ -112,6 +117,7 @@ class ConfigFileParser {
|
|||
RegistryInfoString GetRegistryInfoString() const { return this->registry_info_string_; }
|
||||
AclOptionCfgString GetAclOptionCfgString() { return this->acl_option_cfg_string_; }
|
||||
MicroParamString GetMicroParamString() { return this->micro_param_string_; }
|
||||
CpuOptionCfgString GetCpuOptionCfgString() { return this->cpu_option_cfg_string_; }
|
||||
|
||||
private:
|
||||
int ParseDataPreProcessString(const std::map<std::string, std::map<std::string, std::string>> &maps);
|
||||
|
@ -124,6 +130,7 @@ class ConfigFileParser {
|
|||
int SetMapData(const std::map<std::string, std::string> &input_map,
|
||||
const std::map<std::string, std::string &> &parse_map, const std::string §ion);
|
||||
int ParseMicroParamString(const std::map<std::string, std::map<std::string, std::string>> &maps);
|
||||
int ParseCpuOptionCfgString(const std::map<std::string, std::map<std::string, std::string>> &maps);
|
||||
|
||||
private:
|
||||
DataPreProcessString data_pre_process_string_;
|
||||
|
@ -134,6 +141,7 @@ class ConfigFileParser {
|
|||
RegistryInfoString registry_info_string_;
|
||||
AclOptionCfgString acl_option_cfg_string_;
|
||||
MicroParamString micro_param_string_;
|
||||
CpuOptionCfgString cpu_option_cfg_string_;
|
||||
};
|
||||
|
||||
} // namespace lite
|
||||
|
|
|
@ -0,0 +1,41 @@
|
|||
/**
|
||||
* Copyright 2023 Huawei Technologies Co., Ltd
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
#include "tools/converter/config_parser/cpu_option_param_parser.h"
|
||||
|
||||
namespace mindspore {
|
||||
namespace lite {
|
||||
STATUS CpuOptionParamParser::ParseCpuOptionCfg(const CpuOptionCfgString &cpu_option_string,
|
||||
CpuOptionCfg *cpu_option_cfg) {
|
||||
if (cpu_option_string.architecture.empty() || cpu_option_string.instruction.empty()) {
|
||||
return RET_OK;
|
||||
}
|
||||
|
||||
if (cpu_option_string.architecture != "ARM64") {
|
||||
MS_LOG(ERROR) << "cpu instruction only supported ARM64. But get " << cpu_option_string.architecture;
|
||||
return RET_INPUT_PARAM_INVALID;
|
||||
}
|
||||
|
||||
if (cpu_option_string.instruction != "SIMD_DOT") {
|
||||
MS_LOG(ERROR) << "cpu instruction only supported SIMD_DOT. But get " << cpu_option_string.instruction;
|
||||
return RET_INPUT_PARAM_INVALID;
|
||||
}
|
||||
cpu_option_cfg->instruction = cpu_option_string.instruction;
|
||||
cpu_option_cfg->architecture = cpu_option_string.architecture;
|
||||
return RET_OK;
|
||||
}
|
||||
} // namespace lite
|
||||
} // namespace mindspore
|
|
@ -0,0 +1,32 @@
|
|||
/**
|
||||
* Copyright 2023 Huawei Technologies Co., Ltd
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
#ifndef MINDSPORE_LITE_TOOLS_CONVERTER_CONFIG_PARSER_CPU_OPTION_PARAM_PARSER_H_
|
||||
#define MINDSPORE_LITE_TOOLS_CONVERTER_CONFIG_PARSER_CPU_OPTION_PARAM_PARSER_H_
|
||||
#include <string>
|
||||
#include "tools/converter/cxx_api/converter_para.h"
|
||||
#include "tools/converter/config_parser/config_file_parser.h"
|
||||
#include "include/errorcode.h"
|
||||
|
||||
namespace mindspore {
|
||||
namespace lite {
|
||||
class CpuOptionParamParser {
|
||||
public:
|
||||
STATUS ParseCpuOptionCfg(const CpuOptionCfgString &cpu_option_string, CpuOptionCfg *cpu_option_cfg);
|
||||
};
|
||||
} // namespace lite
|
||||
} // namespace mindspore
|
||||
#endif // MINDSPORE_LITE_TOOLS_CONVERTER_CONFIG_PARSER_CPU_OPTION_PARAM_PARSER_H_
|
|
@ -53,6 +53,8 @@
|
|||
#include "src/common/file_utils.h"
|
||||
#include "ops/dynamic_shape.h"
|
||||
#include "tools/common/parse_config_utils.h"
|
||||
#include "tools/converter/converter_packed_node.h"
|
||||
#include "tools/converter/config_parser/cpu_option_param_parser.h"
|
||||
|
||||
namespace mindspore {
|
||||
extern "C" {
|
||||
|
@ -348,6 +350,13 @@ int ConverterImpl::InitConfigParam(const std::shared_ptr<ConverterPara> ¶m)
|
|||
MS_LOG(ERROR) << "Parse micro param failed.";
|
||||
return ret;
|
||||
}
|
||||
|
||||
lite::CpuOptionParamParser cpu_param_parser;
|
||||
ret = cpu_param_parser.ParseCpuOptionCfg(config_parser.GetCpuOptionCfgString(), ¶m->cpuOptionCfgParam);
|
||||
if (ret != RET_OK) {
|
||||
MS_LOG(ERROR) << "Parse cpu option param failed.";
|
||||
return ret;
|
||||
}
|
||||
return RET_OK;
|
||||
}
|
||||
|
||||
|
@ -817,6 +826,16 @@ int ConverterImpl::SaveGraph(FuncGraphPtr graph, const std::shared_ptr<Converter
|
|||
MS_LOG(ERROR) << "Convert to meta graph failed";
|
||||
return RET_ERROR;
|
||||
}
|
||||
|
||||
if (!param->cpuOptionCfgParam.architecture.empty()) {
|
||||
std::string cpu_option = param->cpuOptionCfgParam.architecture + param->cpuOptionCfgParam.instruction;
|
||||
status = ConverterPackedNode(meta_graph, cpu_option);
|
||||
if (status != RET_OK) {
|
||||
MS_LOG(ERROR) << "save pack info failed.";
|
||||
return status;
|
||||
}
|
||||
}
|
||||
|
||||
meta_graph->version = Version();
|
||||
|
||||
if (param->pre_infer) {
|
||||
|
|
|
@ -55,6 +55,7 @@
|
|||
#include "tools/converter/parser/unify_format.h"
|
||||
#include "tools/optimizer/graph/specify_graph_input_format.h"
|
||||
#include "tools/converter/anf_transform.h"
|
||||
#include "tools/converter/offline_packing_optimizer.h"
|
||||
|
||||
namespace mindspore {
|
||||
namespace lite {
|
||||
|
@ -311,6 +312,14 @@ STATUS ConverterFuncGraph::Optimize(const std::shared_ptr<ConverterPara> ¶m,
|
|||
func_graph->set_attr(kIsOptimized, MakeValue(true));
|
||||
}
|
||||
|
||||
if (!param->cpuOptionCfgParam.architecture.empty()) {
|
||||
// Do offline pack.
|
||||
if (OfflinePackingOptimizer().Optimize(func_graph, "ANDROID_ARM_CPU") != RET_OK) {
|
||||
MS_LOG(ERROR) << "Do offline packing failed.";
|
||||
return status;
|
||||
}
|
||||
}
|
||||
|
||||
return RET_OK;
|
||||
}
|
||||
|
||||
|
|
|
@ -0,0 +1,150 @@
|
|||
/**
|
||||
* Copyright 2023 Huawei Technologies Co., Ltd
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
#include <vector>
|
||||
#include <memory>
|
||||
#include <utility>
|
||||
#include "tools/converter/converter_packed_node.h"
|
||||
#include "tools/converter/offline_packing_optimizer.h"
|
||||
#include "src/litert/kernel/cpu/int8/matmul_dynamic_base_int8.h"
|
||||
#include "mindspore/core/ops/op_name.h"
|
||||
|
||||
namespace mindspore {
|
||||
namespace {
|
||||
constexpr auto kMatmulCustomType = "MatmulFusionPacked";
|
||||
}
|
||||
|
||||
namespace lite {
|
||||
void AddCustomAttr(std::vector<std::unique_ptr<mindspore::schema::AttributeT>> *attrs, const std::string &&key,
|
||||
const std::string &&value) {
|
||||
auto attr = std::make_unique<schema::AttributeT>();
|
||||
attr->name = key;
|
||||
std::vector<uint8_t> attr_data(value.begin(), value.end());
|
||||
attr->data = attr_data;
|
||||
attrs->emplace_back(std::move(attr));
|
||||
}
|
||||
|
||||
int ReplaceMatMulFusionToCustom(schema::MetaGraphT *meta_graph, const std::unique_ptr<schema::CNodeT> &cnode,
|
||||
const std::unique_ptr<mindspore::schema::TensorT> &b_input,
|
||||
const std::string &cpu_option) {
|
||||
auto lite_kernel = PackDataWrapper::GetInstance().GetPackedKernel(cnode->name);
|
||||
if (lite_kernel == nullptr) {
|
||||
MS_LOG(ERROR) << "Get Packed Kernel error.";
|
||||
return RET_ERROR;
|
||||
}
|
||||
auto param = lite_kernel->op_parameter();
|
||||
if (param == nullptr) {
|
||||
MS_LOG(ERROR) << "param is nullptr.";
|
||||
return RET_ERROR;
|
||||
}
|
||||
auto matmul_param = reinterpret_cast<MatMulParameter *>(param);
|
||||
if (matmul_param->matmul_type_ == kMatmulDynamicSdotInt8Cpu) {
|
||||
cnode->primitive->value.type = schema::PrimitiveType_Custom;
|
||||
auto primitive = new (std::nothrow) schema::CustomT;
|
||||
if (primitive == nullptr) {
|
||||
MS_LOG(ERROR) << "new CustomT error.";
|
||||
return RET_NULL_PTR;
|
||||
}
|
||||
primitive->type = kMatmulCustomType;
|
||||
|
||||
// activation_type
|
||||
AddCustomAttr(&(primitive->attr), ops::kActivationType, std::to_string(matmul_param->act_type_));
|
||||
// transpose_a
|
||||
AddCustomAttr(&(primitive->attr), ops::kTransposeA, std::to_string(matmul_param->a_transpose_));
|
||||
// transpose_b
|
||||
AddCustomAttr(&(primitive->attr), ops::kTransposeB, std::to_string(matmul_param->b_transpose_));
|
||||
|
||||
// replace packed data
|
||||
auto matmul_kernel = reinterpret_cast<const mindspore::kernel::MatmulDynamicBaseInt8CPUKernel *>(lite_kernel);
|
||||
auto b_batch = matmul_kernel->GetBBatch();
|
||||
auto pack_b_size = b_batch * matmul_param->col_align_ * matmul_param->deep_align_ * sizeof(int8_t);
|
||||
b_input->data.resize(pack_b_size);
|
||||
if (memcpy_s(b_input->data.data(), b_input->data.size(), matmul_kernel->GetPackBPtr(), pack_b_size) != EOK) {
|
||||
delete primitive;
|
||||
MS_LOG(ERROR) << "new CustomT error.";
|
||||
return RET_ERROR;
|
||||
}
|
||||
|
||||
// add weight_sums to inputs
|
||||
auto weight_sum_size = b_batch * matmul_param->col_align_ * sizeof(int);
|
||||
auto weight_sums_tensor = std::make_unique<schema::TensorT>();
|
||||
weight_sums_tensor->nodeType = lite::NodeType_ValueNode;
|
||||
weight_sums_tensor->format = schema::Format_NHWC;
|
||||
weight_sums_tensor->dataType = TypeId::kNumberTypeInt32;
|
||||
weight_sums_tensor->dims = {};
|
||||
weight_sums_tensor->dims.emplace_back(weight_sum_size / sizeof(int));
|
||||
weight_sums_tensor->data.resize(weight_sum_size);
|
||||
weight_sums_tensor->name = cnode->name + "_weight_sums";
|
||||
if (memcpy_s(weight_sums_tensor->data.data(), weight_sums_tensor->data.size(), matmul_kernel->GetWeightSums(),
|
||||
weight_sum_size) != EOK) {
|
||||
delete primitive;
|
||||
MS_LOG(ERROR) << "new CustomT error.";
|
||||
return RET_ERROR;
|
||||
}
|
||||
cnode->inputIndex.emplace_back(meta_graph->allTensors.size());
|
||||
meta_graph->allTensors.emplace_back(std::move(weight_sums_tensor));
|
||||
|
||||
// add scalar to attr
|
||||
AddCustomAttr(&(primitive->attr), "b_batch", std::to_string(b_batch));
|
||||
AddCustomAttr(&(primitive->attr), "deep", std::to_string(matmul_param->deep_));
|
||||
AddCustomAttr(&(primitive->attr), "col", std::to_string(matmul_param->col_));
|
||||
AddCustomAttr(&(primitive->attr), "col_align", std::to_string(matmul_param->col_align_));
|
||||
AddCustomAttr(&(primitive->attr), "deep_align", std::to_string(matmul_param->deep_align_));
|
||||
|
||||
// add cpu option
|
||||
std::string cpu_option_str = cpu_option;
|
||||
AddCustomAttr(&(primitive->attr), "cpu_option", std::move(cpu_option_str));
|
||||
|
||||
cnode->primitive->value.value = primitive;
|
||||
}
|
||||
return RET_OK;
|
||||
}
|
||||
|
||||
int ConverterPackedNode(schema::MetaGraphT *meta_graph, const std::string &cpu_option) {
|
||||
for (auto &dst_node : meta_graph->nodes) {
|
||||
if (dst_node->primitive == nullptr || dst_node->primitive->value.type != schema::PrimitiveType_MatMulFusion) {
|
||||
continue;
|
||||
}
|
||||
MS_CHECK_TRUE_MSG(dst_node->inputIndex.size() >= kInputSize1, RET_ERROR, "inputs size is wrong.");
|
||||
auto a_index = dst_node->inputIndex[FIRST_INPUT];
|
||||
MS_CHECK_TRUE_MSG(meta_graph->allTensors.size() > a_index, RET_ERROR, "allTensors size is wrong.");
|
||||
auto &a_input = meta_graph->allTensors.at(a_index);
|
||||
CHECK_NULL_RETURN(a_input);
|
||||
|
||||
auto b_index = dst_node->inputIndex[SECOND_INPUT];
|
||||
MS_CHECK_TRUE_MSG(meta_graph->allTensors.size() > b_index, RET_ERROR, "allTensors size is wrong.");
|
||||
auto &b_input = meta_graph->allTensors.at(b_index);
|
||||
CHECK_NULL_RETURN(b_input);
|
||||
|
||||
if (a_input->dataType != b_input->dataType) {
|
||||
MS_LOG(ERROR) << "inputs dataType is not same." << a_input->dataType << " " << b_input->dataType;
|
||||
return RET_ERROR;
|
||||
}
|
||||
|
||||
if (b_input->data.empty()) {
|
||||
continue;
|
||||
}
|
||||
auto ret = ReplaceMatMulFusionToCustom(meta_graph, dst_node, b_input, cpu_option);
|
||||
if (ret != RET_OK) {
|
||||
MS_LOG(ERROR) << "ReplaceMatmulToCustom error.";
|
||||
return ret;
|
||||
}
|
||||
}
|
||||
|
||||
return RET_OK;
|
||||
}
|
||||
} // namespace lite
|
||||
} // namespace mindspore
|
|
@ -0,0 +1,29 @@
|
|||
/**
|
||||
* Copyright 2023 Huawei Technologies Co., Ltd
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
#ifndef MINDSPORE_LITE_TOOLS_CONVERTER_CONVERT_PACKED_NODE_H
|
||||
#define MINDSPORE_LITE_TOOLS_CONVERTER_CONVERT_PACKED_NODE_H
|
||||
|
||||
#include <string>
|
||||
#include "schema/inner/model_generated.h"
|
||||
|
||||
namespace mindspore {
|
||||
namespace lite {
|
||||
int ConverterPackedNode(schema::MetaGraphT *meta_graph, const std::string &cpu_option);
|
||||
} // namespace lite
|
||||
} // namespace mindspore
|
||||
|
||||
#endif // MINDSPORE_LITE_TOOLS_CONVERTER_CONVERT_PACKED_NODE_H
|
|
@ -35,6 +35,11 @@ struct ParallelSplitConfig {
|
|||
std::vector<std::string> parallel_devices_;
|
||||
};
|
||||
|
||||
struct CpuOptionCfg {
|
||||
std::string architecture;
|
||||
std::string instruction;
|
||||
};
|
||||
|
||||
struct ConverterPara {
|
||||
converter::FmkType fmk_type;
|
||||
std::string model_file;
|
||||
|
@ -73,6 +78,7 @@ struct ConverterPara {
|
|||
lite::micro::MicroParam microParam;
|
||||
ParallelSplitConfig parallel_split_config;
|
||||
std::string device;
|
||||
CpuOptionCfg cpuOptionCfgParam;
|
||||
};
|
||||
} // namespace mindspore
|
||||
#endif // MINDSPORE_LITE_TOOLS_CONVERTER_CXX_API_CONVERTER_PARA_H_
|
||||
|
|
|
@ -33,7 +33,7 @@ STATUS MatmulPacking(const mindspore::CNodePtr &cnode_ptr, const FuncGraphPtr &f
|
|||
const lite::InnerContext *ctx);
|
||||
mindspore::lite::InnerContext *InitInnerContextForAndroidArmCpu();
|
||||
|
||||
enum class BackendType : uint {
|
||||
enum class BackendType : uint8_t {
|
||||
kUnknownBackend = 0,
|
||||
kAndroidArmCpuBackend,
|
||||
};
|
||||
|
|
Loading…
Reference in New Issue