forked from mindspore-Ecosystem/mindspore
!11186 mixed quant support Reshape && fix memory leak
From: @xutianchun Reviewed-by: Signed-off-by:
This commit is contained in:
commit
fa3638ad6b
|
@ -8,20 +8,21 @@ file(GLOB KERNEL_SRC
|
|||
)
|
||||
list(REMOVE_ITEM KERNEL_SRC ${CMAKE_CURRENT_SOURCE_DIR}/int8/opt_op_handler.cc)
|
||||
|
||||
if (SUPPORT_TRAIN)
|
||||
file (GLOB TRAIN_KERNEL_SRC ${CMAKE_CURRENT_SOURCE_DIR}/fp32_grad/*.cc)
|
||||
if(SUPPORT_TRAIN)
|
||||
file(GLOB TRAIN_KERNEL_SRC ${CMAKE_CURRENT_SOURCE_DIR}/fp32_grad/*.cc)
|
||||
set(KERNEL_SRC ${KERNEL_SRC} ${TRAIN_KERNEL_SRC})
|
||||
endif()
|
||||
|
||||
add_library(cpu_kernel_mid OBJECT ${KERNEL_SRC})
|
||||
add_dependencies(cpu_kernel_mid fbs_src)
|
||||
|
||||
if (PLATFORM_ARM64)
|
||||
if (ENABLE_FP16)
|
||||
if(PLATFORM_ARM64)
|
||||
if(ENABLE_FP16)
|
||||
file(GLOB FP16_KERNEL_SRC ${CMAKE_CURRENT_SOURCE_DIR}/fp16/*.cc)
|
||||
add_library(cpu_fp16_kernel_mid OBJECT ${FP16_KERNEL_SRC})
|
||||
endif ()
|
||||
add_dependencies(cpu_fp16_kernel_mid fbs_src)
|
||||
endif()
|
||||
file(GLOB OPT_KERNEL_SRC ${CMAKE_CURRENT_SOURCE_DIR}/int8/opt_op_handler.cc)
|
||||
add_library(cpu_opt_kernel_mid OBJECT ${OPT_KERNEL_SRC})
|
||||
endif ()
|
||||
|
||||
add_dependencies(cpu_kernel_mid fbs_src)
|
||||
endif()
|
||||
|
|
|
@ -1,3 +1,3 @@
|
|||
retinaface_732_1280_iod.mindir
|
||||
mobilefacenet_iod.mindir
|
||||
effnet_iod.mindir
|
||||
#effnet_iod.mindir
|
||||
|
|
|
@ -540,9 +540,9 @@ function Run_x86() {
|
|||
echo 'export LD_LIBRARY_PATH=$LD_LIBRARY_PATH:./lib:./third_party/libjpeg-turbo/lib:./third_party/opencv/lib;./benchmark/benchmark --modelFile='${ms_models_path}'/'${model_name}'.ms --inDataFile=/home/workspace/mindspore_dataset/mslite/models/hiai/input_output/input/'${model_name}'.ms.bin --benchmarkDataFile=/home/workspace/mindspore_dataset/mslite/models/hiai/input_output/output/'${model_name}'.ms.out' >> "${run_x86_log_file}"
|
||||
export LD_LIBRARY_PATH=$LD_LIBRARY_PATH:./lib:./third_party/libjpeg-turbo/lib:./third_party/opencv/lib;./benchmark/benchmark --modelFile=${ms_models_path}/${model_name}_weightquant.ms --inDataFile=/home/workspace/mindspore_dataset/mslite/models/hiai/input_output/input/${model_name}.ms.bin --benchmarkDataFile=/home/workspace/mindspore_dataset/mslite/models/hiai/input_output/output/${model_name}.weightquant.ms.out >> "${run_x86_log_file}"
|
||||
if [ $? = 0 ]; then
|
||||
run_result='x86: '${model_name}' pass'; echo ${run_result} >> ${run_benchmark_result_file}
|
||||
run_result='x86: '${model_name}'[weight quant] pass'; echo ${run_result} >> ${run_benchmark_result_file}
|
||||
else
|
||||
run_result='x86: '${model_name}' failed'; echo ${run_result} >> ${run_benchmark_result_file}; return 1
|
||||
run_result='x86: '${model_name}'[weight quant] failed'; echo ${run_result} >> ${run_benchmark_result_file}; return 1
|
||||
fi
|
||||
done < ${models_mindspore_weightquant_config}
|
||||
|
||||
|
@ -806,9 +806,9 @@ function Run_x86_sse() {
|
|||
echo 'export LD_LIBRARY_PATH=$LD_LIBRARY_PATH:./lib:./third_party/libjpeg-turbo/lib:./third_party/opencv/lib;./benchmark/benchmark --modelFile='${ms_models_path}'/'${model_name}'.ms --inDataFile=/home/workspace/mindspore_dataset/mslite/models/hiai/input_output/input/'${model_name}'.ms.bin --benchmarkDataFile=/home/workspace/mindspore_dataset/mslite/models/hiai/input_output/output/'${model_name}'.ms.out' >> "${run_x86_sse_log_file}"
|
||||
export LD_LIBRARY_PATH=$LD_LIBRARY_PATH:./lib:./third_party/libjpeg-turbo/lib:./third_party/opencv/lib;./benchmark/benchmark --modelFile=${ms_models_path}/${model_name}_weightquant.ms --inDataFile=/home/workspace/mindspore_dataset/mslite/models/hiai/input_output/input/${model_name}.ms.bin --benchmarkDataFile=/home/workspace/mindspore_dataset/mslite/models/hiai/input_output/output/${model_name}.weightquant.ms.out >> "${run_x86_sse_log_file}"
|
||||
if [ $? = 0 ]; then
|
||||
run_result='x86_sse: '${model_name}' pass'; echo ${run_result} >> ${run_benchmark_result_file}
|
||||
run_result='x86_sse: '${model_name}'[weight quant] pass'; echo ${run_result} >> ${run_benchmark_result_file}
|
||||
else
|
||||
run_result='x86_sse: '${model_name}' failed'; echo ${run_result} >> ${run_benchmark_result_file}; return 1
|
||||
run_result='x86_sse: '${model_name}'[weight quant] failed'; echo ${run_result} >> ${run_benchmark_result_file}; return 1
|
||||
fi
|
||||
done < ${models_mindspore_weightquant_config}
|
||||
|
||||
|
@ -1072,9 +1072,9 @@ function Run_x86_avx() {
|
|||
echo 'export LD_LIBRARY_PATH=$LD_LIBRARY_PATH:./lib:./third_party/libjpeg-turbo/lib:./third_party/opencv/lib;./benchmark/benchmark --modelFile='${ms_models_path}'/'${model_name}'.ms --inDataFile=/home/workspace/mindspore_dataset/mslite/models/hiai/input_output/input/'${model_name}'.ms.bin --benchmarkDataFile=/home/workspace/mindspore_dataset/mslite/models/hiai/input_output/output/'${model_name}'.ms.out' >> "${run_x86_avx_log_file}"
|
||||
export LD_LIBRARY_PATH=$LD_LIBRARY_PATH:./lib:./third_party/libjpeg-turbo/lib:./third_party/opencv/lib;./benchmark/benchmark --modelFile=${ms_models_path}/${model_name}_weightquant.ms --inDataFile=/home/workspace/mindspore_dataset/mslite/models/hiai/input_output/input/${model_name}.ms.bin --benchmarkDataFile=/home/workspace/mindspore_dataset/mslite/models/hiai/input_output/output/${model_name}.weightquant.ms.out >> "${run_x86_avx_log_file}"
|
||||
if [ $? = 0 ]; then
|
||||
run_result='x86_avx: '${model_name}' pass'; echo ${run_result} >> ${run_benchmark_result_file}
|
||||
run_result='x86_avx: '${model_name}'[weight quant] pass'; echo ${run_result} >> ${run_benchmark_result_file}
|
||||
else
|
||||
run_result='x86_avx: '${model_name}' failed'; echo ${run_result} >> ${run_benchmark_result_file}; return 1
|
||||
run_result='x86_avx: '${model_name}'[weight quant] failed'; echo ${run_result} >> ${run_benchmark_result_file}; return 1
|
||||
fi
|
||||
done < ${models_mindspore_weightquant_config}
|
||||
|
||||
|
@ -1624,9 +1624,9 @@ function Run_arm64() {
|
|||
echo 'export LD_LIBRARY_PATH=$LD_LIBRARY_PATH:/data/local/tmp/benchmark_test;./benchmark --modelFile='${model_name}'_weightquant.ms --inDataFile=/data/local/tmp/input_output/input/'${model_name}'.ms.bin --benchmarkDataFile=/data/local/tmp/input_output/output/'${model_name}'.weightquant.ms.out --loopCount=1' >> adb_run_cmd.txt
|
||||
adb -s ${device_id} shell < adb_run_cmd.txt >> "${run_arm64_log_file}"
|
||||
if [ $? = 0 ]; then
|
||||
run_result='arm64: '${model_name}'_train pass'; echo ${run_result} >> ${run_benchmark_result_file}
|
||||
run_result='arm64: '${model_name}'[weightQuant] pass'; echo ${run_result} >> ${run_benchmark_result_file}
|
||||
else
|
||||
run_result='arm64: '${model_name}'_train failed'; echo ${run_result} >> ${run_benchmark_result_file}; return 1
|
||||
run_result='arm64: '${model_name}'[weightQuant] failed'; echo ${run_result} >> ${run_benchmark_result_file}; return 1
|
||||
fi
|
||||
done < ${models_mindspore_weightquant_config}
|
||||
|
||||
|
|
|
@ -141,11 +141,6 @@ int GraphDefTransform::Transform(const converter::Flags &ctx) {
|
|||
// init old node indecies
|
||||
auto old_nodes = GetGraphNodes();
|
||||
Optimizer formatTransOptimizer;
|
||||
auto formatTransPass = new (std::nothrow) FormatTransPass();
|
||||
if (formatTransPass == nullptr) {
|
||||
MS_LOG(ERROR) << "new formatTransPass failed";
|
||||
return RET_MEMORY_FAILED;
|
||||
}
|
||||
formatTransOptimizer.AddPass(new (std::nothrow) FormatTransFusionPass());
|
||||
formatTransOptimizer.AddPass(new (std::nothrow) IsolatedNodeRemovePass());
|
||||
formatTransOptimizer.AddPass(new (std::nothrow) TransOpRemovePass());
|
||||
|
@ -164,11 +159,6 @@ int GraphDefTransform::Transform(const converter::Flags &ctx) {
|
|||
// init old node indecies
|
||||
auto old_nodes = GetGraphNodes();
|
||||
Optimizer formatTransOptimizer;
|
||||
auto formatTransPass = new (std::nothrow) FormatTransPass();
|
||||
if (formatTransPass == nullptr) {
|
||||
MS_LOG(ERROR) << "new formatTransPass failed";
|
||||
return RET_MEMORY_FAILED;
|
||||
}
|
||||
if (!ctx.trainModel && ctx.fmk != converter::FmkType_ONNX) {
|
||||
formatTransOptimizer.AddPass(new (std::nothrow) GlobalFormatTransformPass());
|
||||
formatTransOptimizer.AddPass(new (std::nothrow) IsolatedNodeRemovePass());
|
||||
|
|
|
@ -418,6 +418,13 @@ PostTrainingQuantizer::PostTrainingQuantizer(FuncGraphPtr graph, string path, in
|
|||
}
|
||||
}
|
||||
|
||||
PostTrainingQuantizer::~PostTrainingQuantizer() {
|
||||
delete fp32_session_;
|
||||
delete fp32_model_;
|
||||
delete int8_session_;
|
||||
delete int8_model_;
|
||||
}
|
||||
|
||||
STATUS PostTrainingQuantizer::DoQuantInput(double scale, int32_t zeropoint, struct MaxMin *max_min,
|
||||
const std::shared_ptr<PrimitiveC> &lite_primitive) const {
|
||||
MS_ASSERT(max_min != nullptr);
|
||||
|
@ -1435,8 +1442,10 @@ STATUS PostTrainingQuantizer::DoQuantize(FuncGraphPtr func_graph) {
|
|||
// anf -- fb
|
||||
flags.quantType = schema::QuantType_QUANT_NONE;
|
||||
MS_LOG(INFO) << "start create session";
|
||||
fp32_session_ = CreateSessionByFuncGraph(func_graph, flags, calibrator_->GetThreadNum());
|
||||
if (fp32_session_ == nullptr) {
|
||||
auto sm = CreateSessionByFuncGraph(func_graph, flags, calibrator_->GetThreadNum());
|
||||
fp32_session_ = sm.session;
|
||||
fp32_model_ = sm.model;
|
||||
if (fp32_session_ == nullptr || fp32_model_ == nullptr) {
|
||||
MS_LOG(ERROR) << "create session failed!";
|
||||
return RET_ERROR;
|
||||
}
|
||||
|
@ -1481,8 +1490,10 @@ STATUS PostTrainingQuantizer::DoQuantize(FuncGraphPtr func_graph) {
|
|||
// init in8 session
|
||||
MS_LOG(INFO) << "create quant session";
|
||||
flags.quantType = schema::QuantType_PostTraining;
|
||||
int8_session_ = CreateSessionByFuncGraph(func_graph, flags, calibrator_->GetThreadNum());
|
||||
if (int8_session_ == nullptr) {
|
||||
auto int8_sm = CreateSessionByFuncGraph(func_graph, flags, calibrator_->GetThreadNum());
|
||||
int8_session_ = int8_sm.session;
|
||||
int8_model_ = int8_sm.model;
|
||||
if (int8_session_ == nullptr || int8_model_ == nullptr) {
|
||||
MS_LOG(ERROR) << "create session failed!";
|
||||
return RET_ERROR;
|
||||
}
|
||||
|
|
|
@ -46,7 +46,7 @@ class PostTrainingQuantizer : public Quantizer {
|
|||
public:
|
||||
PostTrainingQuantizer(FuncGraphPtr graph, std::string path, int bit_num, TypeId target_type = kNumberTypeInt8,
|
||||
bool per_channel = true);
|
||||
~PostTrainingQuantizer() = default;
|
||||
~PostTrainingQuantizer();
|
||||
|
||||
STATUS DoQuantize(FuncGraphPtr func_graph) override;
|
||||
|
||||
|
@ -64,7 +64,9 @@ class PostTrainingQuantizer : public Quantizer {
|
|||
std::unique_ptr<Calibrator> calibrator_;
|
||||
|
||||
session::LiteSession *fp32_session_{nullptr};
|
||||
Model *fp32_model_{nullptr};
|
||||
session::LiteSession *int8_session_{nullptr};
|
||||
Model *int8_model_{nullptr};
|
||||
|
||||
std::map<std::string, std::vector<float>> fp32_op_input_map; // concurency
|
||||
std::map<std::string, std::vector<float>> fp32_op_output_ch_mean_map; // concurency
|
||||
|
|
|
@ -134,14 +134,14 @@ bool QuantStrategy::CanMulOpQuantized(const CNodePtr &node) const {
|
|||
}
|
||||
|
||||
if (node->size() < 3) {
|
||||
MS_LOG(INFO) << "input size less!";
|
||||
MS_LOG(INFO) << node->fullname_with_scope() << " input size less!";
|
||||
return false;
|
||||
}
|
||||
|
||||
auto inputNode1 = node->input(1);
|
||||
auto inputNode2 = node->input(2);
|
||||
if (inputNode1 == nullptr || inputNode2 == nullptr) {
|
||||
MS_LOG(INFO) << "mul input is nullptr!";
|
||||
MS_LOG(INFO) << node->fullname_with_scope() << " mul input is nullptr!";
|
||||
return false;
|
||||
}
|
||||
|
||||
|
@ -153,7 +153,7 @@ bool QuantStrategy::CanMulOpQuantized(const CNodePtr &node) const {
|
|||
}
|
||||
|
||||
if (paramNode == nullptr) {
|
||||
MS_LOG(INFO) << "invalid paramNode!";
|
||||
MS_LOG(INFO) << node->fullname_with_scope() << " invalid paramNode!";
|
||||
return false;
|
||||
}
|
||||
|
||||
|
@ -480,6 +480,48 @@ schema::PrimitiveType NodePrimitiveType(const CNodePtr &cnode) {
|
|||
return (schema::PrimitiveType)primitive_c->Type();
|
||||
}
|
||||
|
||||
std::vector<int> DataToVector(const string &str) {
|
||||
std::vector<int> result;
|
||||
auto raw_datas = str;
|
||||
auto ind = raw_datas.find(',');
|
||||
while (ind != std::string::npos) {
|
||||
auto data = raw_datas.substr(0, ind);
|
||||
Trim(&data);
|
||||
result.push_back(std::stoul(data));
|
||||
raw_datas = raw_datas.substr(ind + 1);
|
||||
Trim(&raw_datas);
|
||||
ind = raw_datas.find(',');
|
||||
}
|
||||
if (!raw_datas.empty()) {
|
||||
result.push_back(std::stoul(raw_datas));
|
||||
}
|
||||
if (result.empty()) {
|
||||
MS_LOG(ERROR) << "result is empty";
|
||||
}
|
||||
return result;
|
||||
}
|
||||
|
||||
std::vector<std::vector<int>> DataToVectors(const string &str) {
|
||||
std::vector<std::vector<int>> result;
|
||||
auto raw_datas = str;
|
||||
auto ind = raw_datas.find(';');
|
||||
while (ind != std::string::npos) {
|
||||
auto data = raw_datas.substr(0, ind);
|
||||
Trim(&data);
|
||||
result.push_back(DataToVector(data));
|
||||
raw_datas = raw_datas.substr(ind + 1);
|
||||
Trim(&raw_datas);
|
||||
ind = raw_datas.find(';');
|
||||
}
|
||||
if (!raw_datas.empty()) {
|
||||
result.push_back(DataToVector(raw_datas));
|
||||
}
|
||||
if (result.empty()) {
|
||||
MS_LOG(ERROR) << "result is empty";
|
||||
}
|
||||
return result;
|
||||
}
|
||||
|
||||
STATUS ParseConfigFile(std::string config_file, PostQuantConfig *post_quant_config) {
|
||||
if (post_quant_config == nullptr) {
|
||||
MS_LOG(ERROR) << "post_quant_config is null.";
|
||||
|
@ -559,6 +601,20 @@ STATUS ParseConfigFile(std::string config_file, PostQuantConfig *post_quant_conf
|
|||
}
|
||||
} else if (key == "mean_error_threshold") {
|
||||
post_quant_config->mean_error_threshold = std::stof(value);
|
||||
} else if (key == "input_shapes") {
|
||||
auto &raw_shape = value;
|
||||
auto ind = raw_shape.find('/');
|
||||
while (ind != std::string::npos) {
|
||||
auto shape = raw_shape.substr(0, ind);
|
||||
Trim(&shape);
|
||||
post_quant_config->input_shapes.push_back(DataToVectors(shape));
|
||||
raw_shape = raw_shape.substr(ind + 1);
|
||||
Trim(&raw_shape);
|
||||
ind = raw_shape.find('/');
|
||||
}
|
||||
if (!raw_shape.empty()) {
|
||||
post_quant_config->input_shapes.push_back(DataToVectors(raw_shape));
|
||||
}
|
||||
} else {
|
||||
MS_LOG(WARNING) << "unsupported parameter: " << key;
|
||||
}
|
||||
|
@ -578,12 +634,12 @@ STATUS ParseConfigFile(std::string config_file, PostQuantConfig *post_quant_conf
|
|||
return RET_OK;
|
||||
}
|
||||
|
||||
session::LiteSession *CreateSessionByFuncGraph(const FuncGraphPtr &func_graph, const converter::Flags &flags,
|
||||
int thread_num) {
|
||||
SessionModel CreateSessionByFuncGraph(const FuncGraphPtr &func_graph, const converter::Flags &flags, int thread_num) {
|
||||
SessionModel sm;
|
||||
auto meta_graph = Export(func_graph, true, true);
|
||||
if (meta_graph == nullptr) {
|
||||
MS_LOG(ERROR) << "Export to meta_graph failed";
|
||||
return nullptr;
|
||||
return sm;
|
||||
}
|
||||
|
||||
// transform
|
||||
|
@ -592,7 +648,7 @@ session::LiteSession *CreateSessionByFuncGraph(const FuncGraphPtr &func_graph, c
|
|||
auto status = fb_transform.Transform(flags);
|
||||
if (status != RET_OK) {
|
||||
MS_LOG(ERROR) << "FBTransform model failed";
|
||||
return nullptr;
|
||||
return sm;
|
||||
}
|
||||
meta_graph->version = Version();
|
||||
|
||||
|
@ -604,12 +660,12 @@ session::LiteSession *CreateSessionByFuncGraph(const FuncGraphPtr &func_graph, c
|
|||
auto *content = reinterpret_cast<const char *>(builder.GetBufferPointer());
|
||||
if (content == nullptr) {
|
||||
MS_LOG(ERROR) << "GetBufferPointer return null";
|
||||
return nullptr;
|
||||
return sm;
|
||||
}
|
||||
auto model = lite::Model::Import(content, size);
|
||||
if (model == nullptr) {
|
||||
MS_LOG(ERROR) << "Import model failed";
|
||||
return nullptr;
|
||||
return sm;
|
||||
}
|
||||
|
||||
Context ctx;
|
||||
|
@ -618,16 +674,19 @@ session::LiteSession *CreateSessionByFuncGraph(const FuncGraphPtr &func_graph, c
|
|||
auto session = session::LiteSession::CreateSession(&ctx);
|
||||
if (session == nullptr) {
|
||||
MS_LOG(ERROR) << "create session failed.";
|
||||
return nullptr;
|
||||
return sm;
|
||||
}
|
||||
|
||||
status = session->CompileGraph(model);
|
||||
if (status != RET_OK) {
|
||||
MS_LOG(ERROR) << "CompileGraph error";
|
||||
return nullptr;
|
||||
return sm;
|
||||
}
|
||||
model->Free();
|
||||
return session;
|
||||
delete meta_graph;
|
||||
sm.session = session;
|
||||
sm.model = model;
|
||||
return sm;
|
||||
}
|
||||
|
||||
STATUS CollectCalibInputs(const std::vector<std::string> &input_dirs, size_t count_limited,
|
||||
|
@ -805,4 +864,21 @@ void GetLiteParameter(const AnfNodePtr &node, ParameterPtr *param_node, ParamVal
|
|||
return;
|
||||
}
|
||||
}
|
||||
|
||||
STATUS UpdateTensorDataAndSize(ParamValueLitePtr weight, void *quant_datas, int new_size) {
|
||||
MS_ASSERT(weight != nullptr);
|
||||
MS_ASSERT(new_size > 0);
|
||||
delete[] reinterpret_cast<char *>(weight->tensor_addr());
|
||||
char *new_tensor_data = new (std::nothrow) char[new_size];
|
||||
if (new_tensor_data == nullptr) {
|
||||
MS_LOG(ERROR) << "new data error";
|
||||
return RET_ERROR;
|
||||
}
|
||||
memcpy(new_tensor_data, quant_datas, new_size);
|
||||
|
||||
weight->set_tensor_size(new_size);
|
||||
weight->set_tensor_addr(new_tensor_data);
|
||||
return RET_OK;
|
||||
}
|
||||
|
||||
} // namespace mindspore::lite::quant
|
||||
|
|
|
@ -57,9 +57,15 @@ struct PostQuantConfig {
|
|||
bool bias_correction{false};
|
||||
bool mixed{false};
|
||||
float mean_error_threshold{0.04};
|
||||
std::vector<std::vector<std::vector<int>>> input_shapes; // different input
|
||||
bool inited{false};
|
||||
};
|
||||
|
||||
struct SessionModel {
|
||||
session::LiteSession *session{nullptr};
|
||||
Model *model{nullptr};
|
||||
};
|
||||
|
||||
/**
|
||||
* 1. when op's weight size > mWeightSize just skip
|
||||
* 2. only do conv/deconv/convdepthwise/deconvdepthwise/mul/matmul/batchmatmul quantization
|
||||
|
@ -97,6 +103,8 @@ std::pair<float, float> OutlierMethod(std::vector<float> min_datas, std::vector<
|
|||
|
||||
std::vector<int8_t> KMeans(float *data, size_t elem_count, size_t k, size_t epochs, schema::QuantParamT *quantParam);
|
||||
|
||||
STATUS UpdateTensorDataAndSize(ParamValueLitePtr weight, void *quant_datas, int new_size);
|
||||
|
||||
template <typename T>
|
||||
T QuantizeData(const float originData, const schema::QuantParamT *quantParam) {
|
||||
MS_ASSERT(quantParam != nullptr);
|
||||
|
@ -148,27 +156,17 @@ T QuantizeData(float originData, const schema::QuantParamT &quantParam, int quan
|
|||
return static_cast<T>(quant_data);
|
||||
}();
|
||||
}
|
||||
|
||||
template <typename T>
|
||||
STATUS QuantFilter(const ParamValueLitePtr &weight, const std::shared_ptr<PrimitiveC> &primitive_c, QuantType quantType,
|
||||
int quant_max, int quant_min, size_t bitNum, bool per_channel, bool k_means = false) {
|
||||
int quant_max, int quant_min, size_t bitNum, bool per_channel, int index = 1, bool k_means = false) {
|
||||
MS_ASSERT(weight != nullptr);
|
||||
MS_ASSERT(primitive_c != nullptr);
|
||||
auto dims = weight->tensor_shape();
|
||||
auto op_type = (schema::PrimitiveType)primitive_c->Type();
|
||||
if (per_channel) {
|
||||
if (dims.size() != 4 && dims.size() != 2 && op_type != schema::PrimitiveType_MatMul) {
|
||||
MS_LOG(INFO) << "weight dims size: " << dims.size() << " switch to per-layer quant mode.";
|
||||
if (dims.size() <= 1) {
|
||||
MS_LOG(WARNING) << "dims is " << dims.size() << " can not per_channel";
|
||||
per_channel = false;
|
||||
} else {
|
||||
if (dims.size() == 2 && op_type != schema::PrimitiveType_FullConnection) {
|
||||
MS_LOG(INFO) << "weight dims size is 2 but op_type is not FullConnection, switch to per-layer quant mode.";
|
||||
per_channel = false;
|
||||
}
|
||||
uint32_t channels = dims[0];
|
||||
if (channels == 0) {
|
||||
MS_LOG(ERROR) << "channels is 0";
|
||||
return RET_ERROR;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -261,12 +259,11 @@ STATUS QuantFilter(const ParamValueLitePtr &weight, const std::shared_ptr<Primit
|
|||
}
|
||||
quant_params.emplace_back(quant_param);
|
||||
}
|
||||
auto ret = memcpy_s(raw_datas, weight->tensor_size(), quant_datas.data(), elem_count * sizeof(T));
|
||||
if (ret != EOK) {
|
||||
MS_LOG(ERROR) << "memcpy error: " << ret;
|
||||
auto status = UpdateTensorDataAndSize(weight, quant_datas.data(), quant_datas.size() * sizeof(T));
|
||||
if (status != RET_OK) {
|
||||
MS_LOG(ERROR) << "UpdateTensorDataAndSize error";
|
||||
return RET_ERROR;
|
||||
}
|
||||
weight->set_tensor_size(elem_count * sizeof(T));
|
||||
} else {
|
||||
// per layer
|
||||
float min = FLT_MAX;
|
||||
|
@ -294,12 +291,11 @@ STATUS QuantFilter(const ParamValueLitePtr &weight, const std::shared_ptr<Primit
|
|||
quant_datas[i] = quant_data;
|
||||
}
|
||||
}
|
||||
auto ret = memcpy_s(raw_datas, weight->tensor_size(), quant_datas.data(), elem_count * sizeof(T));
|
||||
if (ret != EOK) {
|
||||
MS_LOG(ERROR) << "memcpy error: " << ret;
|
||||
auto status = UpdateTensorDataAndSize(weight, quant_datas.data(), quant_datas.size() * sizeof(T));
|
||||
if (status != RET_OK) {
|
||||
MS_LOG(ERROR) << "UpdateTensorDataAndSize error";
|
||||
return RET_ERROR;
|
||||
}
|
||||
weight->set_tensor_size(elem_count * sizeof(T));
|
||||
}
|
||||
|
||||
// do bit pack
|
||||
|
@ -311,21 +307,19 @@ STATUS QuantFilter(const ParamValueLitePtr &weight, const std::shared_ptr<Primit
|
|||
if (bitNum > 0 && bitNum < 8) {
|
||||
std::vector<uint8_t> pack_data{};
|
||||
BitPack::BitPacking<T, uint8_t>(bitNum, data, &pack_data);
|
||||
auto ret = memcpy_s(raw_datas, weight->tensor_size(), pack_data.data(), pack_data.size() * sizeof(uint8_t));
|
||||
if (ret != EOK) {
|
||||
MS_LOG(ERROR) << "PostBitPack memcpy_s qDatas_packed failed";
|
||||
auto status = UpdateTensorDataAndSize(weight, pack_data.data(), pack_data.size() * sizeof(uint8_t));
|
||||
if (status != RET_OK) {
|
||||
MS_LOG(ERROR) << "UpdateTensorDataAndSize error";
|
||||
return RET_ERROR;
|
||||
}
|
||||
weight->set_tensor_size(pack_data.size() * sizeof(uint8_t));
|
||||
} else if (bitNum > 8 && bitNum < 16) {
|
||||
std::vector<uint16_t> pack_data{};
|
||||
BitPack::BitPacking<T, uint16_t>(bitNum, data, &pack_data);
|
||||
auto ret = memcpy_s(raw_datas, weight->tensor_size(), pack_data.data(), pack_data.size() * sizeof(uint16_t));
|
||||
if (ret != EOK) {
|
||||
MS_LOG(ERROR) << "PostBitPack memcpy_s qDatas_packed failed";
|
||||
auto status = UpdateTensorDataAndSize(weight, pack_data.data(), pack_data.size() * sizeof(uint16_t));
|
||||
if (status != RET_OK) {
|
||||
MS_LOG(ERROR) << "UpdateTensorDataAndSize error";
|
||||
return RET_ERROR;
|
||||
}
|
||||
weight->set_tensor_size(pack_data.size() * sizeof(uint16_t));
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -336,7 +330,7 @@ STATUS QuantFilter(const ParamValueLitePtr &weight, const std::shared_ptr<Primit
|
|||
if (quantType == QuantType_PostTraining) {
|
||||
primitive_c->AddInputQuantParam(quant_params);
|
||||
} else {
|
||||
primitive_c->set_input_quant_param(WEIGHT_INDEX, quant_params);
|
||||
primitive_c->set_input_quant_param(index, quant_params);
|
||||
}
|
||||
return RET_OK;
|
||||
}
|
||||
|
@ -347,8 +341,7 @@ schema::PrimitiveType NodePrimitiveType(const CNodePtr &cnode);
|
|||
|
||||
STATUS ParseConfigFile(std::string config_file, PostQuantConfig *post_quant_config);
|
||||
|
||||
session::LiteSession *CreateSessionByFuncGraph(const FuncGraphPtr &func_graph, const converter::Flags &flags,
|
||||
int thread_num);
|
||||
SessionModel CreateSessionByFuncGraph(const FuncGraphPtr &func_graph, const converter::Flags &flags, int thread_num);
|
||||
|
||||
STATUS CollectCalibInputs(const std::vector<std::string> &input_dirs, size_t count_limited,
|
||||
std::vector<std::vector<std::string>> *inputs);
|
||||
|
@ -359,6 +352,5 @@ STATUS CopyInputDataToTensor(size_t input_index, size_t image_index,
|
|||
FuncGraphPtr CopyFuncGraph(const FuncGraphPtr &);
|
||||
|
||||
void GetLiteParameter(const AnfNodePtr &node, ParameterPtr *param_node, ParamValueLitePtr *param_value);
|
||||
|
||||
} // namespace mindspore::lite::quant
|
||||
#endif
|
||||
|
|
|
@ -84,7 +84,13 @@ WeightQuantizer::WeightQuantizer(FuncGraphPtr graph, const std::string &config_f
|
|||
}
|
||||
}
|
||||
|
||||
WeightQuantizer::~WeightQuantizer() { delete fp32_session_; }
|
||||
WeightQuantizer::~WeightQuantizer() {
|
||||
for (const auto &fp32_output_tensor : fp32_output_tensors_) {
|
||||
for (const auto &kv : fp32_output_tensor) {
|
||||
delete kv.second;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
STATUS WeightQuantizer::SetAbstract(ParamValueLitePtr param_value, ParameterPtr param_node,
|
||||
std::shared_ptr<PrimitiveC> primitive_c) {
|
||||
|
@ -278,11 +284,11 @@ STATUS WeightQuantizer::DoLstmQuntize(CNodePtr cnode) {
|
|||
}
|
||||
auto status = RET_ERROR;
|
||||
if (type_id_ == kNumberTypeInt8) {
|
||||
status =
|
||||
QuantFilter<int8_t>(param_value, primitive_c, QuantType_WeightQuant, quant_max_, quant_min_, bit_num_, false);
|
||||
status = QuantFilter<int8_t>(param_value, primitive_c, QuantType_WeightQuant, quant_max_, quant_min_, bit_num_,
|
||||
false, 2);
|
||||
} else if (type_id_ == kNumberTypeInt16) {
|
||||
status =
|
||||
QuantFilter<int16_t>(param_value, primitive_c, QuantType_WeightQuant, quant_max_, quant_min_, bit_num_, false);
|
||||
status = QuantFilter<int16_t>(param_value, primitive_c, QuantType_WeightQuant, quant_max_, quant_min_, bit_num_,
|
||||
false, 2);
|
||||
}
|
||||
if (status != RET_OK) {
|
||||
MS_LOG(ERROR) << "QuantFilter failed : " << status;
|
||||
|
@ -438,15 +444,73 @@ float CompareOutputData(const std::unordered_map<std::string, mindspore::tensor:
|
|||
return total_mean_error / tensor_cnt;
|
||||
}
|
||||
|
||||
STATUS WeightQuantizer::DoMiexedQuant(FuncGraphPtr func_graph) {
|
||||
STATUS WeightQuantizer::RunFp32Graph(FuncGraphPtr func_graph) {
|
||||
auto image_cnt = images_.at(0).size();
|
||||
if (!config_param_.input_shapes.empty()) {
|
||||
if (config_param_.input_shapes.size() != image_cnt) {
|
||||
MS_LOG(ERROR) << "input_shapes size: " << config_param_.input_shapes.size() << " image_cnt: " << image_cnt;
|
||||
return RET_ERROR;
|
||||
}
|
||||
}
|
||||
// 0.1 Create Fp32 Session
|
||||
flags.quantType = schema::QuantType_QUANT_NONE;
|
||||
fp32_session_ = CreateSessionByFuncGraph(func_graph, flags, config_param_.thread_num);
|
||||
if (fp32_session_ == nullptr) {
|
||||
auto fp32_sm = CreateSessionByFuncGraph(func_graph, flags, config_param_.thread_num);
|
||||
auto fp32_session = fp32_sm.session;
|
||||
auto fp32_model = fp32_sm.model;
|
||||
if (fp32_session == nullptr || fp32_model == nullptr) {
|
||||
MS_LOG(ERROR) << "CreateSessoin fail";
|
||||
delete fp32_model;
|
||||
return RET_ERROR;
|
||||
}
|
||||
auto fp32_inputs = fp32_session_->GetInputs();
|
||||
auto fp32_inputs = fp32_session->GetInputs();
|
||||
fp32_output_tensors_.resize(image_cnt);
|
||||
// 0.3 save fp32 output
|
||||
for (size_t i = 0; i < image_cnt; i++) {
|
||||
if (!config_param_.input_shapes.empty()) {
|
||||
auto status = fp32_session->Resize(fp32_inputs, {config_param_.input_shapes[i]});
|
||||
if (status != RET_OK) {
|
||||
MS_LOG(ERROR) << "session Resize fail";
|
||||
delete fp32_sm.session;
|
||||
delete fp32_sm.model;
|
||||
return RET_ERROR;
|
||||
}
|
||||
}
|
||||
for (size_t input_index = 0; input_index < fp32_inputs.size(); input_index++) {
|
||||
auto status = CopyInputDataToTensor(input_index, i, images_, fp32_inputs[input_index]);
|
||||
if (status != RET_OK) {
|
||||
MS_LOG(ERROR) << "generate input data from images failed!";
|
||||
delete fp32_sm.session;
|
||||
delete fp32_sm.model;
|
||||
return RET_ERROR;
|
||||
}
|
||||
}
|
||||
auto status = fp32_session->RunGraph();
|
||||
if (status != RET_OK) {
|
||||
MS_LOG(ERROR) << "RunGraph fail";
|
||||
delete fp32_sm.session;
|
||||
delete fp32_sm.model;
|
||||
return RET_ERROR;
|
||||
}
|
||||
auto fp32_outputs = fp32_session->GetOutputs();
|
||||
for (const auto &kv : fp32_outputs) {
|
||||
auto *tensor = kv.second;
|
||||
auto *lite_tensor = reinterpret_cast<lite::Tensor *>(tensor);
|
||||
if (lite_tensor == nullptr) {
|
||||
MS_LOG(ERROR) << "not lite tensor";
|
||||
delete fp32_sm.session;
|
||||
delete fp32_sm.model;
|
||||
return RET_ERROR;
|
||||
}
|
||||
auto *new_tensor = Tensor::CopyTensor(*lite_tensor, true);
|
||||
fp32_output_tensors_[i][kv.first] = new_tensor;
|
||||
}
|
||||
}
|
||||
delete fp32_sm.session;
|
||||
delete fp32_sm.model;
|
||||
return RET_OK;
|
||||
}
|
||||
|
||||
STATUS WeightQuantizer::DoMiexedQuant(FuncGraphPtr func_graph) {
|
||||
// 0.2 Parse input calib files
|
||||
auto status = CollectCalibInputs(config_param_.image_paths, config_param_.batch_count, &images_);
|
||||
if (status != RET_OK) {
|
||||
|
@ -454,6 +518,12 @@ STATUS WeightQuantizer::DoMiexedQuant(FuncGraphPtr func_graph) {
|
|||
return RET_ERROR;
|
||||
}
|
||||
|
||||
MS_LOG(DEBUG) << "run fp32 model";
|
||||
status = RunFp32Graph(func_graph);
|
||||
if (status != RET_OK) {
|
||||
return RET_ERROR;
|
||||
}
|
||||
|
||||
auto cnodes = func_graph->GetOrderedCnodes();
|
||||
for (auto &cnode : cnodes) {
|
||||
auto op_type = NodePrimitiveType(cnode);
|
||||
|
@ -471,6 +541,13 @@ STATUS WeightQuantizer::DoMiexedQuant(FuncGraphPtr func_graph) {
|
|||
}
|
||||
}
|
||||
}
|
||||
auto image_cnt = images_.at(0).size();
|
||||
if (!config_param_.input_shapes.empty()) {
|
||||
if (config_param_.input_shapes.size() != image_cnt) {
|
||||
MS_LOG(ERROR) << "input_shapes size: " << config_param_.input_shapes.size() << " image_cnt: " << image_cnt;
|
||||
return RET_ERROR;
|
||||
}
|
||||
}
|
||||
|
||||
for (auto iter = cnodes.end(); iter != cnodes.begin();) {
|
||||
auto cnode = *(--iter);
|
||||
|
@ -540,66 +617,58 @@ STATUS WeightQuantizer::DoMiexedQuant(FuncGraphPtr func_graph) {
|
|||
// 2. evaluate the quant
|
||||
// 2.1 create quant session, get input, output tensor
|
||||
flags.quantType = schema::QuantType_WeightQuant;
|
||||
auto quant_session =
|
||||
std::unique_ptr<session::LiteSession>(CreateSessionByFuncGraph(func_graph, flags, config_param_.thread_num));
|
||||
auto quant_sm = CreateSessionByFuncGraph(func_graph, flags, config_param_.thread_num);
|
||||
auto quant_session = std::unique_ptr<session::LiteSession>(quant_sm.session);
|
||||
if (quant_session == nullptr) {
|
||||
MS_LOG(ERROR) << "create session error: " << status;
|
||||
delete quant_sm.model;
|
||||
return RET_ERROR;
|
||||
}
|
||||
auto quant_inputs = quant_session->GetInputs();
|
||||
|
||||
auto mean_error = 0.0f;
|
||||
if (fp32_inputs.size() != images_.size()) {
|
||||
MS_LOG(ERROR) << "model's input tensor cnt: " << fp32_inputs.size() << " != " << images_.size();
|
||||
return RET_ERROR;
|
||||
}
|
||||
auto image_cnt = images_.at(0).size();
|
||||
for (size_t i = 0; i < image_cnt; i++) {
|
||||
// set multi-input data
|
||||
for (size_t input_index = 0; input_index < fp32_inputs.size(); input_index++) {
|
||||
status = CopyInputDataToTensor(input_index, i, images_, fp32_inputs[input_index]);
|
||||
if (!config_param_.input_shapes.empty()) {
|
||||
status = quant_session->Resize(quant_inputs, {config_param_.input_shapes[i]});
|
||||
if (status != RET_OK) {
|
||||
MS_LOG(ERROR) << "generate input data from images failed!";
|
||||
MS_LOG(ERROR) << "session Resize fail";
|
||||
delete quant_sm.model;
|
||||
return RET_ERROR;
|
||||
}
|
||||
}
|
||||
|
||||
// set multi-input data
|
||||
for (size_t input_index = 0; input_index < quant_inputs.size(); input_index++) {
|
||||
status = CopyInputDataToTensor(input_index, i, images_, quant_inputs[input_index]);
|
||||
if (status != RET_OK) {
|
||||
MS_LOG(ERROR) << "generate input data from images failed!";
|
||||
delete quant_sm.model;
|
||||
return RET_ERROR;
|
||||
}
|
||||
}
|
||||
std::future<STATUS> fp32_inference = std::async(
|
||||
std::launch::async, [](session::LiteSession *fp32_session) -> STATUS { return fp32_session->RunGraph(); },
|
||||
fp32_session_);
|
||||
|
||||
status = quant_session->RunGraph();
|
||||
if (status != RET_OK) {
|
||||
MS_LOG(ERROR) << "quant session run error";
|
||||
return RET_ERROR;
|
||||
}
|
||||
status = fp32_inference.get();
|
||||
if (status != RET_OK) {
|
||||
MS_LOG(ERROR) << "fp32 session run error";
|
||||
delete quant_sm.model;
|
||||
return RET_ERROR;
|
||||
}
|
||||
// 3. compare betwen quant and fp32
|
||||
auto fp32_outputs = fp32_session_->GetOutputs();
|
||||
auto quant_outputs = quant_session->GetOutputs();
|
||||
mean_error += CompareOutputData<float>(fp32_outputs, quant_outputs);
|
||||
mean_error += CompareOutputData<float>(fp32_output_tensors_[i], quant_outputs);
|
||||
} // end_for: calib data loop
|
||||
delete quant_sm.model;
|
||||
mean_error = mean_error / image_cnt;
|
||||
|
||||
if (mean_error <= config_param_.mean_error_threshold) {
|
||||
MS_LOG(DEBUG) << "op: " << op_name << " got mixed bit: " << bit_num_t << " mean_error: " << mean_error;
|
||||
opname_bit_[op_name] = bit_num_t;
|
||||
break;
|
||||
} else if (bit_num_t != 8) {
|
||||
MS_LOG(DEBUG) << "op: " << op_name << " intermediate bit: " << bit_num_t << " mean_error: " << mean_error
|
||||
<< " [recover]";
|
||||
// recover
|
||||
param_value->set_tensor_size(sizeof(float) * elem_count);
|
||||
ret = memcpy_s(raw_data, param_value->tensor_size(), origin_data, sizeof(float) * elem_count);
|
||||
if (ret != EOK) {
|
||||
MS_LOG(ERROR) << "memcpy fail: "
|
||||
<< " src size: " << sizeof(float) * elem_count << " dst size: " << param_value->tensor_size();
|
||||
status = UpdateTensorDataAndSize(param_value, origin_data, sizeof(float) * elem_count);
|
||||
if (status != RET_OK) {
|
||||
MS_LOG(ERROR) << "UpdateTensorDataAndSize fail";
|
||||
return RET_ERROR;
|
||||
}
|
||||
} else {
|
||||
|
@ -610,6 +679,9 @@ STATUS WeightQuantizer::DoMiexedQuant(FuncGraphPtr func_graph) {
|
|||
free(origin_data);
|
||||
} // if: conv and matmul
|
||||
} // end loop: all cnode
|
||||
for (const auto &kv : opname_bit_) {
|
||||
MS_LOG(INFO) << "op: " << kv.first << " bit:" << kv.second;
|
||||
}
|
||||
return RET_OK;
|
||||
}
|
||||
|
||||
|
|
|
@ -19,6 +19,7 @@
|
|||
|
||||
#include <future>
|
||||
#include <memory>
|
||||
#include <unordered_map>
|
||||
#include <map>
|
||||
#include <list>
|
||||
#include <string>
|
||||
|
@ -59,11 +60,12 @@ class WeightQuantizer : public Quantizer {
|
|||
std::string config_file_;
|
||||
PostQuantConfig config_param_;
|
||||
std::vector<std::vector<std::string>> images_; // multi_input, [[mode_input_0], [model_input_1]...]
|
||||
session::LiteSession *fp32_session_ = nullptr;
|
||||
std::vector<std::unordered_map<std::string, mindspore::tensor::MSTensor *>> fp32_output_tensors_;
|
||||
|
||||
STATUS DoMiexedQuant(FuncGraphPtr);
|
||||
STATUS SetAbstract(ParamValueLitePtr param_value, ParameterPtr param_node, std::shared_ptr<PrimitiveC> primitive_c);
|
||||
STATUS DoFixedQuant(FuncGraphPtr);
|
||||
STATUS RunFp32Graph(FuncGraphPtr);
|
||||
};
|
||||
} // namespace mindspore::lite::quant
|
||||
#endif
|
||||
|
|
Loading…
Reference in New Issue