!11186 mixed quant support Reshape && fix memory leak

From: @xutianchun
Reviewed-by: 
Signed-off-by:
This commit is contained in:
mindspore-ci-bot 2021-01-18 19:09:28 +08:00 committed by Gitee
commit fa3638ad6b
10 changed files with 261 additions and 115 deletions

View File

@ -8,20 +8,21 @@ file(GLOB KERNEL_SRC
)
list(REMOVE_ITEM KERNEL_SRC ${CMAKE_CURRENT_SOURCE_DIR}/int8/opt_op_handler.cc)
if (SUPPORT_TRAIN)
file (GLOB TRAIN_KERNEL_SRC ${CMAKE_CURRENT_SOURCE_DIR}/fp32_grad/*.cc)
if(SUPPORT_TRAIN)
file(GLOB TRAIN_KERNEL_SRC ${CMAKE_CURRENT_SOURCE_DIR}/fp32_grad/*.cc)
set(KERNEL_SRC ${KERNEL_SRC} ${TRAIN_KERNEL_SRC})
endif()
add_library(cpu_kernel_mid OBJECT ${KERNEL_SRC})
add_dependencies(cpu_kernel_mid fbs_src)
if (PLATFORM_ARM64)
if (ENABLE_FP16)
if(PLATFORM_ARM64)
if(ENABLE_FP16)
file(GLOB FP16_KERNEL_SRC ${CMAKE_CURRENT_SOURCE_DIR}/fp16/*.cc)
add_library(cpu_fp16_kernel_mid OBJECT ${FP16_KERNEL_SRC})
endif ()
add_dependencies(cpu_fp16_kernel_mid fbs_src)
endif()
file(GLOB OPT_KERNEL_SRC ${CMAKE_CURRENT_SOURCE_DIR}/int8/opt_op_handler.cc)
add_library(cpu_opt_kernel_mid OBJECT ${OPT_KERNEL_SRC})
endif ()
add_dependencies(cpu_kernel_mid fbs_src)
endif()

View File

@ -1,3 +1,3 @@
retinaface_732_1280_iod.mindir
mobilefacenet_iod.mindir
effnet_iod.mindir
#effnet_iod.mindir

View File

@ -540,9 +540,9 @@ function Run_x86() {
echo 'export LD_LIBRARY_PATH=$LD_LIBRARY_PATH:./lib:./third_party/libjpeg-turbo/lib:./third_party/opencv/lib;./benchmark/benchmark --modelFile='${ms_models_path}'/'${model_name}'.ms --inDataFile=/home/workspace/mindspore_dataset/mslite/models/hiai/input_output/input/'${model_name}'.ms.bin --benchmarkDataFile=/home/workspace/mindspore_dataset/mslite/models/hiai/input_output/output/'${model_name}'.ms.out' >> "${run_x86_log_file}"
export LD_LIBRARY_PATH=$LD_LIBRARY_PATH:./lib:./third_party/libjpeg-turbo/lib:./third_party/opencv/lib;./benchmark/benchmark --modelFile=${ms_models_path}/${model_name}_weightquant.ms --inDataFile=/home/workspace/mindspore_dataset/mslite/models/hiai/input_output/input/${model_name}.ms.bin --benchmarkDataFile=/home/workspace/mindspore_dataset/mslite/models/hiai/input_output/output/${model_name}.weightquant.ms.out >> "${run_x86_log_file}"
if [ $? = 0 ]; then
run_result='x86: '${model_name}' pass'; echo ${run_result} >> ${run_benchmark_result_file}
run_result='x86: '${model_name}'[weight quant] pass'; echo ${run_result} >> ${run_benchmark_result_file}
else
run_result='x86: '${model_name}' failed'; echo ${run_result} >> ${run_benchmark_result_file}; return 1
run_result='x86: '${model_name}'[weight quant] failed'; echo ${run_result} >> ${run_benchmark_result_file}; return 1
fi
done < ${models_mindspore_weightquant_config}
@ -806,9 +806,9 @@ function Run_x86_sse() {
echo 'export LD_LIBRARY_PATH=$LD_LIBRARY_PATH:./lib:./third_party/libjpeg-turbo/lib:./third_party/opencv/lib;./benchmark/benchmark --modelFile='${ms_models_path}'/'${model_name}'.ms --inDataFile=/home/workspace/mindspore_dataset/mslite/models/hiai/input_output/input/'${model_name}'.ms.bin --benchmarkDataFile=/home/workspace/mindspore_dataset/mslite/models/hiai/input_output/output/'${model_name}'.ms.out' >> "${run_x86_sse_log_file}"
export LD_LIBRARY_PATH=$LD_LIBRARY_PATH:./lib:./third_party/libjpeg-turbo/lib:./third_party/opencv/lib;./benchmark/benchmark --modelFile=${ms_models_path}/${model_name}_weightquant.ms --inDataFile=/home/workspace/mindspore_dataset/mslite/models/hiai/input_output/input/${model_name}.ms.bin --benchmarkDataFile=/home/workspace/mindspore_dataset/mslite/models/hiai/input_output/output/${model_name}.weightquant.ms.out >> "${run_x86_sse_log_file}"
if [ $? = 0 ]; then
run_result='x86_sse: '${model_name}' pass'; echo ${run_result} >> ${run_benchmark_result_file}
run_result='x86_sse: '${model_name}'[weight quant] pass'; echo ${run_result} >> ${run_benchmark_result_file}
else
run_result='x86_sse: '${model_name}' failed'; echo ${run_result} >> ${run_benchmark_result_file}; return 1
run_result='x86_sse: '${model_name}'[weight quant] failed'; echo ${run_result} >> ${run_benchmark_result_file}; return 1
fi
done < ${models_mindspore_weightquant_config}
@ -1072,9 +1072,9 @@ function Run_x86_avx() {
echo 'export LD_LIBRARY_PATH=$LD_LIBRARY_PATH:./lib:./third_party/libjpeg-turbo/lib:./third_party/opencv/lib;./benchmark/benchmark --modelFile='${ms_models_path}'/'${model_name}'.ms --inDataFile=/home/workspace/mindspore_dataset/mslite/models/hiai/input_output/input/'${model_name}'.ms.bin --benchmarkDataFile=/home/workspace/mindspore_dataset/mslite/models/hiai/input_output/output/'${model_name}'.ms.out' >> "${run_x86_avx_log_file}"
export LD_LIBRARY_PATH=$LD_LIBRARY_PATH:./lib:./third_party/libjpeg-turbo/lib:./third_party/opencv/lib;./benchmark/benchmark --modelFile=${ms_models_path}/${model_name}_weightquant.ms --inDataFile=/home/workspace/mindspore_dataset/mslite/models/hiai/input_output/input/${model_name}.ms.bin --benchmarkDataFile=/home/workspace/mindspore_dataset/mslite/models/hiai/input_output/output/${model_name}.weightquant.ms.out >> "${run_x86_avx_log_file}"
if [ $? = 0 ]; then
run_result='x86_avx: '${model_name}' pass'; echo ${run_result} >> ${run_benchmark_result_file}
run_result='x86_avx: '${model_name}'[weight quant] pass'; echo ${run_result} >> ${run_benchmark_result_file}
else
run_result='x86_avx: '${model_name}' failed'; echo ${run_result} >> ${run_benchmark_result_file}; return 1
run_result='x86_avx: '${model_name}'[weight quant] failed'; echo ${run_result} >> ${run_benchmark_result_file}; return 1
fi
done < ${models_mindspore_weightquant_config}
@ -1624,9 +1624,9 @@ function Run_arm64() {
echo 'export LD_LIBRARY_PATH=$LD_LIBRARY_PATH:/data/local/tmp/benchmark_test;./benchmark --modelFile='${model_name}'_weightquant.ms --inDataFile=/data/local/tmp/input_output/input/'${model_name}'.ms.bin --benchmarkDataFile=/data/local/tmp/input_output/output/'${model_name}'.weightquant.ms.out --loopCount=1' >> adb_run_cmd.txt
adb -s ${device_id} shell < adb_run_cmd.txt >> "${run_arm64_log_file}"
if [ $? = 0 ]; then
run_result='arm64: '${model_name}'_train pass'; echo ${run_result} >> ${run_benchmark_result_file}
run_result='arm64: '${model_name}'[weightQuant] pass'; echo ${run_result} >> ${run_benchmark_result_file}
else
run_result='arm64: '${model_name}'_train failed'; echo ${run_result} >> ${run_benchmark_result_file}; return 1
run_result='arm64: '${model_name}'[weightQuant] failed'; echo ${run_result} >> ${run_benchmark_result_file}; return 1
fi
done < ${models_mindspore_weightquant_config}

View File

@ -141,11 +141,6 @@ int GraphDefTransform::Transform(const converter::Flags &ctx) {
// init old node indecies
auto old_nodes = GetGraphNodes();
Optimizer formatTransOptimizer;
auto formatTransPass = new (std::nothrow) FormatTransPass();
if (formatTransPass == nullptr) {
MS_LOG(ERROR) << "new formatTransPass failed";
return RET_MEMORY_FAILED;
}
formatTransOptimizer.AddPass(new (std::nothrow) FormatTransFusionPass());
formatTransOptimizer.AddPass(new (std::nothrow) IsolatedNodeRemovePass());
formatTransOptimizer.AddPass(new (std::nothrow) TransOpRemovePass());
@ -164,11 +159,6 @@ int GraphDefTransform::Transform(const converter::Flags &ctx) {
// init old node indecies
auto old_nodes = GetGraphNodes();
Optimizer formatTransOptimizer;
auto formatTransPass = new (std::nothrow) FormatTransPass();
if (formatTransPass == nullptr) {
MS_LOG(ERROR) << "new formatTransPass failed";
return RET_MEMORY_FAILED;
}
if (!ctx.trainModel && ctx.fmk != converter::FmkType_ONNX) {
formatTransOptimizer.AddPass(new (std::nothrow) GlobalFormatTransformPass());
formatTransOptimizer.AddPass(new (std::nothrow) IsolatedNodeRemovePass());

View File

@ -418,6 +418,13 @@ PostTrainingQuantizer::PostTrainingQuantizer(FuncGraphPtr graph, string path, in
}
}
PostTrainingQuantizer::~PostTrainingQuantizer() {
delete fp32_session_;
delete fp32_model_;
delete int8_session_;
delete int8_model_;
}
STATUS PostTrainingQuantizer::DoQuantInput(double scale, int32_t zeropoint, struct MaxMin *max_min,
const std::shared_ptr<PrimitiveC> &lite_primitive) const {
MS_ASSERT(max_min != nullptr);
@ -1435,8 +1442,10 @@ STATUS PostTrainingQuantizer::DoQuantize(FuncGraphPtr func_graph) {
// anf -- fb
flags.quantType = schema::QuantType_QUANT_NONE;
MS_LOG(INFO) << "start create session";
fp32_session_ = CreateSessionByFuncGraph(func_graph, flags, calibrator_->GetThreadNum());
if (fp32_session_ == nullptr) {
auto sm = CreateSessionByFuncGraph(func_graph, flags, calibrator_->GetThreadNum());
fp32_session_ = sm.session;
fp32_model_ = sm.model;
if (fp32_session_ == nullptr || fp32_model_ == nullptr) {
MS_LOG(ERROR) << "create session failed!";
return RET_ERROR;
}
@ -1481,8 +1490,10 @@ STATUS PostTrainingQuantizer::DoQuantize(FuncGraphPtr func_graph) {
// init in8 session
MS_LOG(INFO) << "create quant session";
flags.quantType = schema::QuantType_PostTraining;
int8_session_ = CreateSessionByFuncGraph(func_graph, flags, calibrator_->GetThreadNum());
if (int8_session_ == nullptr) {
auto int8_sm = CreateSessionByFuncGraph(func_graph, flags, calibrator_->GetThreadNum());
int8_session_ = int8_sm.session;
int8_model_ = int8_sm.model;
if (int8_session_ == nullptr || int8_model_ == nullptr) {
MS_LOG(ERROR) << "create session failed!";
return RET_ERROR;
}

View File

@ -46,7 +46,7 @@ class PostTrainingQuantizer : public Quantizer {
public:
PostTrainingQuantizer(FuncGraphPtr graph, std::string path, int bit_num, TypeId target_type = kNumberTypeInt8,
bool per_channel = true);
~PostTrainingQuantizer() = default;
~PostTrainingQuantizer();
STATUS DoQuantize(FuncGraphPtr func_graph) override;
@ -64,7 +64,9 @@ class PostTrainingQuantizer : public Quantizer {
std::unique_ptr<Calibrator> calibrator_;
session::LiteSession *fp32_session_{nullptr};
Model *fp32_model_{nullptr};
session::LiteSession *int8_session_{nullptr};
Model *int8_model_{nullptr};
std::map<std::string, std::vector<float>> fp32_op_input_map; // concurency
std::map<std::string, std::vector<float>> fp32_op_output_ch_mean_map; // concurency

View File

@ -134,14 +134,14 @@ bool QuantStrategy::CanMulOpQuantized(const CNodePtr &node) const {
}
if (node->size() < 3) {
MS_LOG(INFO) << "input size less!";
MS_LOG(INFO) << node->fullname_with_scope() << " input size less!";
return false;
}
auto inputNode1 = node->input(1);
auto inputNode2 = node->input(2);
if (inputNode1 == nullptr || inputNode2 == nullptr) {
MS_LOG(INFO) << "mul input is nullptr!";
MS_LOG(INFO) << node->fullname_with_scope() << " mul input is nullptr!";
return false;
}
@ -153,7 +153,7 @@ bool QuantStrategy::CanMulOpQuantized(const CNodePtr &node) const {
}
if (paramNode == nullptr) {
MS_LOG(INFO) << "invalid paramNode!";
MS_LOG(INFO) << node->fullname_with_scope() << " invalid paramNode!";
return false;
}
@ -480,6 +480,48 @@ schema::PrimitiveType NodePrimitiveType(const CNodePtr &cnode) {
return (schema::PrimitiveType)primitive_c->Type();
}
std::vector<int> DataToVector(const string &str) {
std::vector<int> result;
auto raw_datas = str;
auto ind = raw_datas.find(',');
while (ind != std::string::npos) {
auto data = raw_datas.substr(0, ind);
Trim(&data);
result.push_back(std::stoul(data));
raw_datas = raw_datas.substr(ind + 1);
Trim(&raw_datas);
ind = raw_datas.find(',');
}
if (!raw_datas.empty()) {
result.push_back(std::stoul(raw_datas));
}
if (result.empty()) {
MS_LOG(ERROR) << "result is empty";
}
return result;
}
std::vector<std::vector<int>> DataToVectors(const string &str) {
std::vector<std::vector<int>> result;
auto raw_datas = str;
auto ind = raw_datas.find(';');
while (ind != std::string::npos) {
auto data = raw_datas.substr(0, ind);
Trim(&data);
result.push_back(DataToVector(data));
raw_datas = raw_datas.substr(ind + 1);
Trim(&raw_datas);
ind = raw_datas.find(';');
}
if (!raw_datas.empty()) {
result.push_back(DataToVector(raw_datas));
}
if (result.empty()) {
MS_LOG(ERROR) << "result is empty";
}
return result;
}
STATUS ParseConfigFile(std::string config_file, PostQuantConfig *post_quant_config) {
if (post_quant_config == nullptr) {
MS_LOG(ERROR) << "post_quant_config is null.";
@ -559,6 +601,20 @@ STATUS ParseConfigFile(std::string config_file, PostQuantConfig *post_quant_conf
}
} else if (key == "mean_error_threshold") {
post_quant_config->mean_error_threshold = std::stof(value);
} else if (key == "input_shapes") {
auto &raw_shape = value;
auto ind = raw_shape.find('/');
while (ind != std::string::npos) {
auto shape = raw_shape.substr(0, ind);
Trim(&shape);
post_quant_config->input_shapes.push_back(DataToVectors(shape));
raw_shape = raw_shape.substr(ind + 1);
Trim(&raw_shape);
ind = raw_shape.find('/');
}
if (!raw_shape.empty()) {
post_quant_config->input_shapes.push_back(DataToVectors(raw_shape));
}
} else {
MS_LOG(WARNING) << "unsupported parameter: " << key;
}
@ -578,12 +634,12 @@ STATUS ParseConfigFile(std::string config_file, PostQuantConfig *post_quant_conf
return RET_OK;
}
session::LiteSession *CreateSessionByFuncGraph(const FuncGraphPtr &func_graph, const converter::Flags &flags,
int thread_num) {
SessionModel CreateSessionByFuncGraph(const FuncGraphPtr &func_graph, const converter::Flags &flags, int thread_num) {
SessionModel sm;
auto meta_graph = Export(func_graph, true, true);
if (meta_graph == nullptr) {
MS_LOG(ERROR) << "Export to meta_graph failed";
return nullptr;
return sm;
}
// transform
@ -592,7 +648,7 @@ session::LiteSession *CreateSessionByFuncGraph(const FuncGraphPtr &func_graph, c
auto status = fb_transform.Transform(flags);
if (status != RET_OK) {
MS_LOG(ERROR) << "FBTransform model failed";
return nullptr;
return sm;
}
meta_graph->version = Version();
@ -604,12 +660,12 @@ session::LiteSession *CreateSessionByFuncGraph(const FuncGraphPtr &func_graph, c
auto *content = reinterpret_cast<const char *>(builder.GetBufferPointer());
if (content == nullptr) {
MS_LOG(ERROR) << "GetBufferPointer return null";
return nullptr;
return sm;
}
auto model = lite::Model::Import(content, size);
if (model == nullptr) {
MS_LOG(ERROR) << "Import model failed";
return nullptr;
return sm;
}
Context ctx;
@ -618,16 +674,19 @@ session::LiteSession *CreateSessionByFuncGraph(const FuncGraphPtr &func_graph, c
auto session = session::LiteSession::CreateSession(&ctx);
if (session == nullptr) {
MS_LOG(ERROR) << "create session failed.";
return nullptr;
return sm;
}
status = session->CompileGraph(model);
if (status != RET_OK) {
MS_LOG(ERROR) << "CompileGraph error";
return nullptr;
return sm;
}
model->Free();
return session;
delete meta_graph;
sm.session = session;
sm.model = model;
return sm;
}
STATUS CollectCalibInputs(const std::vector<std::string> &input_dirs, size_t count_limited,
@ -805,4 +864,21 @@ void GetLiteParameter(const AnfNodePtr &node, ParameterPtr *param_node, ParamVal
return;
}
}
STATUS UpdateTensorDataAndSize(ParamValueLitePtr weight, void *quant_datas, int new_size) {
MS_ASSERT(weight != nullptr);
MS_ASSERT(new_size > 0);
delete[] reinterpret_cast<char *>(weight->tensor_addr());
char *new_tensor_data = new (std::nothrow) char[new_size];
if (new_tensor_data == nullptr) {
MS_LOG(ERROR) << "new data error";
return RET_ERROR;
}
memcpy(new_tensor_data, quant_datas, new_size);
weight->set_tensor_size(new_size);
weight->set_tensor_addr(new_tensor_data);
return RET_OK;
}
} // namespace mindspore::lite::quant

View File

@ -57,9 +57,15 @@ struct PostQuantConfig {
bool bias_correction{false};
bool mixed{false};
float mean_error_threshold{0.04};
std::vector<std::vector<std::vector<int>>> input_shapes; // different input
bool inited{false};
};
struct SessionModel {
session::LiteSession *session{nullptr};
Model *model{nullptr};
};
/**
* 1. when op's weight size > mWeightSize just skip
* 2. only do conv/deconv/convdepthwise/deconvdepthwise/mul/matmul/batchmatmul quantization
@ -97,6 +103,8 @@ std::pair<float, float> OutlierMethod(std::vector<float> min_datas, std::vector<
std::vector<int8_t> KMeans(float *data, size_t elem_count, size_t k, size_t epochs, schema::QuantParamT *quantParam);
STATUS UpdateTensorDataAndSize(ParamValueLitePtr weight, void *quant_datas, int new_size);
template <typename T>
T QuantizeData(const float originData, const schema::QuantParamT *quantParam) {
MS_ASSERT(quantParam != nullptr);
@ -148,27 +156,17 @@ T QuantizeData(float originData, const schema::QuantParamT &quantParam, int quan
return static_cast<T>(quant_data);
}();
}
template <typename T>
STATUS QuantFilter(const ParamValueLitePtr &weight, const std::shared_ptr<PrimitiveC> &primitive_c, QuantType quantType,
int quant_max, int quant_min, size_t bitNum, bool per_channel, bool k_means = false) {
int quant_max, int quant_min, size_t bitNum, bool per_channel, int index = 1, bool k_means = false) {
MS_ASSERT(weight != nullptr);
MS_ASSERT(primitive_c != nullptr);
auto dims = weight->tensor_shape();
auto op_type = (schema::PrimitiveType)primitive_c->Type();
if (per_channel) {
if (dims.size() != 4 && dims.size() != 2 && op_type != schema::PrimitiveType_MatMul) {
MS_LOG(INFO) << "weight dims size: " << dims.size() << " switch to per-layer quant mode.";
if (dims.size() <= 1) {
MS_LOG(WARNING) << "dims is " << dims.size() << " can not per_channel";
per_channel = false;
} else {
if (dims.size() == 2 && op_type != schema::PrimitiveType_FullConnection) {
MS_LOG(INFO) << "weight dims size is 2 but op_type is not FullConnection, switch to per-layer quant mode.";
per_channel = false;
}
uint32_t channels = dims[0];
if (channels == 0) {
MS_LOG(ERROR) << "channels is 0";
return RET_ERROR;
}
}
}
@ -261,12 +259,11 @@ STATUS QuantFilter(const ParamValueLitePtr &weight, const std::shared_ptr<Primit
}
quant_params.emplace_back(quant_param);
}
auto ret = memcpy_s(raw_datas, weight->tensor_size(), quant_datas.data(), elem_count * sizeof(T));
if (ret != EOK) {
MS_LOG(ERROR) << "memcpy error: " << ret;
auto status = UpdateTensorDataAndSize(weight, quant_datas.data(), quant_datas.size() * sizeof(T));
if (status != RET_OK) {
MS_LOG(ERROR) << "UpdateTensorDataAndSize error";
return RET_ERROR;
}
weight->set_tensor_size(elem_count * sizeof(T));
} else {
// per layer
float min = FLT_MAX;
@ -294,12 +291,11 @@ STATUS QuantFilter(const ParamValueLitePtr &weight, const std::shared_ptr<Primit
quant_datas[i] = quant_data;
}
}
auto ret = memcpy_s(raw_datas, weight->tensor_size(), quant_datas.data(), elem_count * sizeof(T));
if (ret != EOK) {
MS_LOG(ERROR) << "memcpy error: " << ret;
auto status = UpdateTensorDataAndSize(weight, quant_datas.data(), quant_datas.size() * sizeof(T));
if (status != RET_OK) {
MS_LOG(ERROR) << "UpdateTensorDataAndSize error";
return RET_ERROR;
}
weight->set_tensor_size(elem_count * sizeof(T));
}
// do bit pack
@ -311,21 +307,19 @@ STATUS QuantFilter(const ParamValueLitePtr &weight, const std::shared_ptr<Primit
if (bitNum > 0 && bitNum < 8) {
std::vector<uint8_t> pack_data{};
BitPack::BitPacking<T, uint8_t>(bitNum, data, &pack_data);
auto ret = memcpy_s(raw_datas, weight->tensor_size(), pack_data.data(), pack_data.size() * sizeof(uint8_t));
if (ret != EOK) {
MS_LOG(ERROR) << "PostBitPack memcpy_s qDatas_packed failed";
auto status = UpdateTensorDataAndSize(weight, pack_data.data(), pack_data.size() * sizeof(uint8_t));
if (status != RET_OK) {
MS_LOG(ERROR) << "UpdateTensorDataAndSize error";
return RET_ERROR;
}
weight->set_tensor_size(pack_data.size() * sizeof(uint8_t));
} else if (bitNum > 8 && bitNum < 16) {
std::vector<uint16_t> pack_data{};
BitPack::BitPacking<T, uint16_t>(bitNum, data, &pack_data);
auto ret = memcpy_s(raw_datas, weight->tensor_size(), pack_data.data(), pack_data.size() * sizeof(uint16_t));
if (ret != EOK) {
MS_LOG(ERROR) << "PostBitPack memcpy_s qDatas_packed failed";
auto status = UpdateTensorDataAndSize(weight, pack_data.data(), pack_data.size() * sizeof(uint16_t));
if (status != RET_OK) {
MS_LOG(ERROR) << "UpdateTensorDataAndSize error";
return RET_ERROR;
}
weight->set_tensor_size(pack_data.size() * sizeof(uint16_t));
}
}
@ -336,7 +330,7 @@ STATUS QuantFilter(const ParamValueLitePtr &weight, const std::shared_ptr<Primit
if (quantType == QuantType_PostTraining) {
primitive_c->AddInputQuantParam(quant_params);
} else {
primitive_c->set_input_quant_param(WEIGHT_INDEX, quant_params);
primitive_c->set_input_quant_param(index, quant_params);
}
return RET_OK;
}
@ -347,8 +341,7 @@ schema::PrimitiveType NodePrimitiveType(const CNodePtr &cnode);
STATUS ParseConfigFile(std::string config_file, PostQuantConfig *post_quant_config);
session::LiteSession *CreateSessionByFuncGraph(const FuncGraphPtr &func_graph, const converter::Flags &flags,
int thread_num);
SessionModel CreateSessionByFuncGraph(const FuncGraphPtr &func_graph, const converter::Flags &flags, int thread_num);
STATUS CollectCalibInputs(const std::vector<std::string> &input_dirs, size_t count_limited,
std::vector<std::vector<std::string>> *inputs);
@ -359,6 +352,5 @@ STATUS CopyInputDataToTensor(size_t input_index, size_t image_index,
FuncGraphPtr CopyFuncGraph(const FuncGraphPtr &);
void GetLiteParameter(const AnfNodePtr &node, ParameterPtr *param_node, ParamValueLitePtr *param_value);
} // namespace mindspore::lite::quant
#endif

View File

@ -84,7 +84,13 @@ WeightQuantizer::WeightQuantizer(FuncGraphPtr graph, const std::string &config_f
}
}
WeightQuantizer::~WeightQuantizer() { delete fp32_session_; }
WeightQuantizer::~WeightQuantizer() {
for (const auto &fp32_output_tensor : fp32_output_tensors_) {
for (const auto &kv : fp32_output_tensor) {
delete kv.second;
}
}
}
STATUS WeightQuantizer::SetAbstract(ParamValueLitePtr param_value, ParameterPtr param_node,
std::shared_ptr<PrimitiveC> primitive_c) {
@ -278,11 +284,11 @@ STATUS WeightQuantizer::DoLstmQuntize(CNodePtr cnode) {
}
auto status = RET_ERROR;
if (type_id_ == kNumberTypeInt8) {
status =
QuantFilter<int8_t>(param_value, primitive_c, QuantType_WeightQuant, quant_max_, quant_min_, bit_num_, false);
status = QuantFilter<int8_t>(param_value, primitive_c, QuantType_WeightQuant, quant_max_, quant_min_, bit_num_,
false, 2);
} else if (type_id_ == kNumberTypeInt16) {
status =
QuantFilter<int16_t>(param_value, primitive_c, QuantType_WeightQuant, quant_max_, quant_min_, bit_num_, false);
status = QuantFilter<int16_t>(param_value, primitive_c, QuantType_WeightQuant, quant_max_, quant_min_, bit_num_,
false, 2);
}
if (status != RET_OK) {
MS_LOG(ERROR) << "QuantFilter failed : " << status;
@ -438,15 +444,73 @@ float CompareOutputData(const std::unordered_map<std::string, mindspore::tensor:
return total_mean_error / tensor_cnt;
}
STATUS WeightQuantizer::DoMiexedQuant(FuncGraphPtr func_graph) {
STATUS WeightQuantizer::RunFp32Graph(FuncGraphPtr func_graph) {
auto image_cnt = images_.at(0).size();
if (!config_param_.input_shapes.empty()) {
if (config_param_.input_shapes.size() != image_cnt) {
MS_LOG(ERROR) << "input_shapes size: " << config_param_.input_shapes.size() << " image_cnt: " << image_cnt;
return RET_ERROR;
}
}
// 0.1 Create Fp32 Session
flags.quantType = schema::QuantType_QUANT_NONE;
fp32_session_ = CreateSessionByFuncGraph(func_graph, flags, config_param_.thread_num);
if (fp32_session_ == nullptr) {
auto fp32_sm = CreateSessionByFuncGraph(func_graph, flags, config_param_.thread_num);
auto fp32_session = fp32_sm.session;
auto fp32_model = fp32_sm.model;
if (fp32_session == nullptr || fp32_model == nullptr) {
MS_LOG(ERROR) << "CreateSessoin fail";
delete fp32_model;
return RET_ERROR;
}
auto fp32_inputs = fp32_session_->GetInputs();
auto fp32_inputs = fp32_session->GetInputs();
fp32_output_tensors_.resize(image_cnt);
// 0.3 save fp32 output
for (size_t i = 0; i < image_cnt; i++) {
if (!config_param_.input_shapes.empty()) {
auto status = fp32_session->Resize(fp32_inputs, {config_param_.input_shapes[i]});
if (status != RET_OK) {
MS_LOG(ERROR) << "session Resize fail";
delete fp32_sm.session;
delete fp32_sm.model;
return RET_ERROR;
}
}
for (size_t input_index = 0; input_index < fp32_inputs.size(); input_index++) {
auto status = CopyInputDataToTensor(input_index, i, images_, fp32_inputs[input_index]);
if (status != RET_OK) {
MS_LOG(ERROR) << "generate input data from images failed!";
delete fp32_sm.session;
delete fp32_sm.model;
return RET_ERROR;
}
}
auto status = fp32_session->RunGraph();
if (status != RET_OK) {
MS_LOG(ERROR) << "RunGraph fail";
delete fp32_sm.session;
delete fp32_sm.model;
return RET_ERROR;
}
auto fp32_outputs = fp32_session->GetOutputs();
for (const auto &kv : fp32_outputs) {
auto *tensor = kv.second;
auto *lite_tensor = reinterpret_cast<lite::Tensor *>(tensor);
if (lite_tensor == nullptr) {
MS_LOG(ERROR) << "not lite tensor";
delete fp32_sm.session;
delete fp32_sm.model;
return RET_ERROR;
}
auto *new_tensor = Tensor::CopyTensor(*lite_tensor, true);
fp32_output_tensors_[i][kv.first] = new_tensor;
}
}
delete fp32_sm.session;
delete fp32_sm.model;
return RET_OK;
}
STATUS WeightQuantizer::DoMiexedQuant(FuncGraphPtr func_graph) {
// 0.2 Parse input calib files
auto status = CollectCalibInputs(config_param_.image_paths, config_param_.batch_count, &images_);
if (status != RET_OK) {
@ -454,6 +518,12 @@ STATUS WeightQuantizer::DoMiexedQuant(FuncGraphPtr func_graph) {
return RET_ERROR;
}
MS_LOG(DEBUG) << "run fp32 model";
status = RunFp32Graph(func_graph);
if (status != RET_OK) {
return RET_ERROR;
}
auto cnodes = func_graph->GetOrderedCnodes();
for (auto &cnode : cnodes) {
auto op_type = NodePrimitiveType(cnode);
@ -471,6 +541,13 @@ STATUS WeightQuantizer::DoMiexedQuant(FuncGraphPtr func_graph) {
}
}
}
auto image_cnt = images_.at(0).size();
if (!config_param_.input_shapes.empty()) {
if (config_param_.input_shapes.size() != image_cnt) {
MS_LOG(ERROR) << "input_shapes size: " << config_param_.input_shapes.size() << " image_cnt: " << image_cnt;
return RET_ERROR;
}
}
for (auto iter = cnodes.end(); iter != cnodes.begin();) {
auto cnode = *(--iter);
@ -540,66 +617,58 @@ STATUS WeightQuantizer::DoMiexedQuant(FuncGraphPtr func_graph) {
// 2. evaluate the quant
// 2.1 create quant session, get input, output tensor
flags.quantType = schema::QuantType_WeightQuant;
auto quant_session =
std::unique_ptr<session::LiteSession>(CreateSessionByFuncGraph(func_graph, flags, config_param_.thread_num));
auto quant_sm = CreateSessionByFuncGraph(func_graph, flags, config_param_.thread_num);
auto quant_session = std::unique_ptr<session::LiteSession>(quant_sm.session);
if (quant_session == nullptr) {
MS_LOG(ERROR) << "create session error: " << status;
delete quant_sm.model;
return RET_ERROR;
}
auto quant_inputs = quant_session->GetInputs();
auto mean_error = 0.0f;
if (fp32_inputs.size() != images_.size()) {
MS_LOG(ERROR) << "model's input tensor cnt: " << fp32_inputs.size() << " != " << images_.size();
return RET_ERROR;
}
auto image_cnt = images_.at(0).size();
for (size_t i = 0; i < image_cnt; i++) {
// set multi-input data
for (size_t input_index = 0; input_index < fp32_inputs.size(); input_index++) {
status = CopyInputDataToTensor(input_index, i, images_, fp32_inputs[input_index]);
if (!config_param_.input_shapes.empty()) {
status = quant_session->Resize(quant_inputs, {config_param_.input_shapes[i]});
if (status != RET_OK) {
MS_LOG(ERROR) << "generate input data from images failed!";
MS_LOG(ERROR) << "session Resize fail";
delete quant_sm.model;
return RET_ERROR;
}
}
// set multi-input data
for (size_t input_index = 0; input_index < quant_inputs.size(); input_index++) {
status = CopyInputDataToTensor(input_index, i, images_, quant_inputs[input_index]);
if (status != RET_OK) {
MS_LOG(ERROR) << "generate input data from images failed!";
delete quant_sm.model;
return RET_ERROR;
}
}
std::future<STATUS> fp32_inference = std::async(
std::launch::async, [](session::LiteSession *fp32_session) -> STATUS { return fp32_session->RunGraph(); },
fp32_session_);
status = quant_session->RunGraph();
if (status != RET_OK) {
MS_LOG(ERROR) << "quant session run error";
return RET_ERROR;
}
status = fp32_inference.get();
if (status != RET_OK) {
MS_LOG(ERROR) << "fp32 session run error";
delete quant_sm.model;
return RET_ERROR;
}
// 3. compare betwen quant and fp32
auto fp32_outputs = fp32_session_->GetOutputs();
auto quant_outputs = quant_session->GetOutputs();
mean_error += CompareOutputData<float>(fp32_outputs, quant_outputs);
mean_error += CompareOutputData<float>(fp32_output_tensors_[i], quant_outputs);
} // end_for: calib data loop
delete quant_sm.model;
mean_error = mean_error / image_cnt;
if (mean_error <= config_param_.mean_error_threshold) {
MS_LOG(DEBUG) << "op: " << op_name << " got mixed bit: " << bit_num_t << " mean_error: " << mean_error;
opname_bit_[op_name] = bit_num_t;
break;
} else if (bit_num_t != 8) {
MS_LOG(DEBUG) << "op: " << op_name << " intermediate bit: " << bit_num_t << " mean_error: " << mean_error
<< " [recover]";
// recover
param_value->set_tensor_size(sizeof(float) * elem_count);
ret = memcpy_s(raw_data, param_value->tensor_size(), origin_data, sizeof(float) * elem_count);
if (ret != EOK) {
MS_LOG(ERROR) << "memcpy fail: "
<< " src size: " << sizeof(float) * elem_count << " dst size: " << param_value->tensor_size();
status = UpdateTensorDataAndSize(param_value, origin_data, sizeof(float) * elem_count);
if (status != RET_OK) {
MS_LOG(ERROR) << "UpdateTensorDataAndSize fail";
return RET_ERROR;
}
} else {
@ -610,6 +679,9 @@ STATUS WeightQuantizer::DoMiexedQuant(FuncGraphPtr func_graph) {
free(origin_data);
} // if: conv and matmul
} // end loop: all cnode
for (const auto &kv : opname_bit_) {
MS_LOG(INFO) << "op: " << kv.first << " bit:" << kv.second;
}
return RET_OK;
}

View File

@ -19,6 +19,7 @@
#include <future>
#include <memory>
#include <unordered_map>
#include <map>
#include <list>
#include <string>
@ -59,11 +60,12 @@ class WeightQuantizer : public Quantizer {
std::string config_file_;
PostQuantConfig config_param_;
std::vector<std::vector<std::string>> images_; // multi_input, [[mode_input_0], [model_input_1]...]
session::LiteSession *fp32_session_ = nullptr;
std::vector<std::unordered_map<std::string, mindspore::tensor::MSTensor *>> fp32_output_tensors_;
STATUS DoMiexedQuant(FuncGraphPtr);
STATUS SetAbstract(ParamValueLitePtr param_value, ParameterPtr param_node, std::shared_ptr<PrimitiveC> primitive_c);
STATUS DoFixedQuant(FuncGraphPtr);
STATUS RunFp32Graph(FuncGraphPtr);
};
} // namespace mindspore::lite::quant
#endif