Merge quant util of converter and runtime, and save quantized models

This commit is contained in:
Emir Haleva 2021-05-02 17:25:06 +03:00
parent 9416502e90
commit 5251f59f27
18 changed files with 607 additions and 802 deletions

View File

@ -49,6 +49,7 @@ set(LITE_SRC
${CMAKE_CURRENT_SOURCE_DIR}/common/prim_util.cc
${CMAKE_CURRENT_SOURCE_DIR}/common/tensor_util.cc
${CMAKE_CURRENT_SOURCE_DIR}/common/loader_util.cc
${CMAKE_CURRENT_SOURCE_DIR}/common/quant_utils.cc
${CMAKE_CURRENT_SOURCE_DIR}/runtime/allocator.cc
${CMAKE_CURRENT_SOURCE_DIR}/runtime/runtime_api.cc
${CMAKE_CURRENT_SOURCE_DIR}/runtime/thread_pool.c
@ -124,6 +125,7 @@ if(SUPPORT_TRAIN)
${CMAKE_CURRENT_SOURCE_DIR}/train/accuracy_monitor.cc
${CMAKE_CURRENT_SOURCE_DIR}/train/classification_train_accuracy_monitor.cc
${CMAKE_CURRENT_SOURCE_DIR}/train/train_export.cc
${CMAKE_CURRENT_SOURCE_DIR}/../tools/common/storage.cc
)
if(ENABLE_V0)
set(LITE_SRC
@ -192,7 +194,10 @@ if(BUILD_MINDDATA STREQUAL "lite")
target_link_libraries(mindspore-lite_static minddata_eager_mid)
endif()
if(SUPPORT_TRAIN)
add_dependencies(mindspore-lite fbs_inner_src)
add_dependencies(mindspore-lite_static fbs_inner_src)
target_link_libraries(mindspore-lite minddata-lite)
target_link_libraries(mindspore-lite_static minddata-lite)
endif()

View File

@ -0,0 +1,104 @@
/**
* Copyright 2021 Huawei Technologies Co., Ltd
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#include "schema/inner/model_generated.h"
#include "src/common/quant_utils.h"
#include "src/lite_kernel.h"
namespace mindspore {
namespace lite {
void GetMaxMinPerchannel(int channels, int one_filter_size, int i, int elem_count, const float *raw_datas,
bool channel_at_first, float *desired_max, float *desired_min) {
float min = FLT_MAX;
float max = -FLT_MAX;
// find min and max
for (int j = 0; j < one_filter_size; j++) {
auto index = j + i * one_filter_size;
if (!channel_at_first) {
index = j * channels + i;
}
if (index >= elem_count) {
MS_LOG(ERROR) << "over flow!";
}
min = std::min(min, raw_datas[index]);
max = std::max(max, raw_datas[index]);
}
*desired_max = max;
*desired_min = min;
}
STATUS CalQuantizationParams(schema::QuantParamT *quantParam, double mMin, double mMax, bool narrowRange, int quant_max,
int quant_min, int num_bits) {
MS_ASSERT(quantParam != nullptr);
if (mMin > 0.0f) {
MS_LOG(DEBUG) << "min " << mMin << " is bigger then 0, set to 0, this may course low precision";
mMin = 0.0f;
}
if (mMax < 0.0f) {
MS_LOG(DEBUG) << "mMax " << mMax << " is smaller than 0, set to 0, this may course low precision";
mMax = 0.0f;
}
if (mMin > mMax) {
MS_LOG(ERROR) << "cal error while min" << mMin << ">" << mMax;
return RET_PARAM_INVALID;
}
if (mMin == mMax) {
if (mMin != 0.0f) {
MS_LOG(ERROR) << "min and max should both be zero if they are equal to each other";
return RET_ERROR;
}
quantParam->inited = true;
quantParam->min = mMin;
quantParam->max = mMax;
quantParam->scale = 0.0f;
quantParam->zeroPoint = 0;
quantParam->narrowRange = narrowRange;
quantParam->numBits = num_bits;
return RET_OK;
}
auto quantMinFloat = static_cast<double>(quant_min);
auto quantMaxFloat = static_cast<double>(quant_max);
if (fabs(quantMaxFloat - quantMinFloat) <= 0.0f) {
MS_LOG(ERROR) << "divisor cannot be 0";
return RET_ERROR;
}
double scale = (mMax - mMin) / (quantMaxFloat - quantMinFloat);
if (fabs(scale) <= 0.0f) {
MS_LOG(ERROR) << "divisor 'scale' cannot be 0";
return RET_ERROR;
}
const double zeroPointFromMin = quantMinFloat - mMin / scale;
int zeroPoint = static_cast<int32_t>(std::round(zeroPointFromMin));
// The zero point should always be in the range of quantized value,
// [qmin, qmax].
MS_ASSERT(zeroPoint >= quant_min);
MS_ASSERT(zeroPoint <= quant_max);
quantParam->inited = true;
quantParam->min = mMin;
quantParam->max = mMax;
quantParam->scale = scale;
quantParam->zeroPoint = zeroPoint;
quantParam->narrowRange = narrowRange;
quantParam->numBits = num_bits;
return RET_OK;
}
} // namespace lite
} // namespace mindspore

View File

@ -0,0 +1,234 @@
/**
* Copyright 2021 Huawei Technologies Co., Ltd
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#ifndef MINDSPORE_LITE_SRC_COMMON_QUANT_UTILS_H_
#define MINDSPORE_LITE_SRC_COMMON_QUANT_UTILS_H_
#include <float.h>
#include <cmath>
#include <climits>
#include <limits>
#include <algorithm>
#include <vector>
#include "include/errorcode.h"
#include "src/common/log_adapter.h"
#include "ir/dtype/type_id.h"
namespace mindspore {
namespace schema {
struct QuantParamT;
}
namespace lite {
const int RET_QUANT_CONTINUE = 2;
static constexpr double SCALE_THREASHOLD = 1e-38;
static constexpr int kPerTensor = 1;
inline int QuantMax(int bits, TypeId type) {
if (type == kNumberTypeInt8) {
return (1 << (bits - 1)) - 1;
} else if (type == kNumberTypeUInt8) {
return (1 << bits) - 1;
}
return 0;
}
inline int QuantMin(int bits, TypeId type) {
if (type == kNumberTypeInt8) {
return -(1 << (bits - 1));
}
return 0;
}
void GetMaxMinPerchannel(int channels, int one_filter_size, int i, int elem_count, const float *raw_datas,
bool channel_at_first, float *desired_max, float *desired_min);
STATUS CalQuantizationParams(schema::QuantParamT *quantParam, double mMin, double mMax, bool narrowRange, int quant_max,
int quant_min, int num_bits);
template <typename T>
T QuantizeData(const float originData, const schema::QuantParamT *quantParam) {
MS_ASSERT(quantParam != nullptr);
MS_ASSERT(quantParam->inited);
const auto scale = quantParam->scale;
const auto zeroPoint = quantParam->zeroPoint;
const auto numBit = quantParam->numBits;
const auto narrowRange = quantParam->narrowRange;
double maxLimitTemp = static_cast<float>((1 << (unsigned int)numBit) - 1);
const double maxLimit = static_cast<float>(maxLimitTemp - zeroPoint + std::numeric_limits<T>::min()) * scale;
double minLimit;
if (narrowRange) {
minLimit = static_cast<float>(std::numeric_limits<T>::min() + 1 - zeroPoint) * scale;
} else {
minLimit = static_cast<float>(std::numeric_limits<T>::min() - zeroPoint) * scale;
}
return [maxLimit, minLimit, zeroPoint, scale, narrowRange, originData] {
double tmp;
if (originData > maxLimit) {
tmp = maxLimit;
} else if (originData < minLimit) {
tmp = minLimit;
} else {
tmp = originData;
}
auto quantData = static_cast<T>(std::round(zeroPoint + tmp / scale));
return quantData;
}();
}
template <typename T>
T QuantizeData(float originData, const schema::QuantParamT *quantParam, int quant_max, int quant_min) {
MS_ASSERT(quantParam != nullptr);
MS_ASSERT(quantParam->inited);
const auto scale = quantParam->scale;
const int zeroPoint = quantParam->zeroPoint;
const int maxLimit = quant_max;
const int minLimit = quant_min;
if (scale <= SCALE_THREASHOLD) {
return 0;
}
return [maxLimit, minLimit, zeroPoint, scale, originData] {
auto quant_data = std::round(originData / scale + zeroPoint);
if (quant_data > maxLimit) {
quant_data = maxLimit;
} else if (quant_data < minLimit) {
quant_data = minLimit;
}
return static_cast<T>(quant_data);
}();
}
template <typename T>
STATUS DoPerLayerQuant(const float *raw_datas, size_t elem_count, std::vector<schema::QuantParamT> *quant_params,
const int &quant_max, const int &quant_min, const size_t &bit_num, const bool &k_means,
std::vector<T> *quant_datas) {
float min = FLT_MAX;
float max = -FLT_MIN;
for (uint32_t i = 0; i < elem_count; i++) {
min = std::min(min, raw_datas[i]);
max = std::max(max, raw_datas[i]);
}
schema::QuantParamT quant_param;
if (!k_means) {
STATUS status = CalQuantizationParams(&quant_param, min, max, false, quant_max, quant_min, bit_num);
if (status != RET_OK) {
MS_LOG(ERROR) << "CalQuantizationParams failed" << status;
return status;
}
}
quant_params->emplace_back(quant_param);
// update data and datatype
for (uint32_t i = 0; i < elem_count; i++) {
float raw_data = raw_datas[i];
if (!k_means) {
auto quant_data = QuantizeData<T>(raw_data, &quant_param, quant_max, quant_min);
(*quant_datas)[i] = quant_data;
}
}
return RET_OK;
}
template <typename T>
STATUS DoPerChannelQuant(const float *raw_datas, size_t elem_count, const schema::QuantType &quant_type,
std::vector<schema::QuantParamT> *quant_params, const int &quant_max, const int &quant_min,
const size_t &bit_num, const bool &k_means, std::vector<T> *quant_datas, int channels,
bool channel_at_first = true) {
static const int quant_param_size = 32 * 8;
std::vector<float> dequant_datas(quant_datas->size());
if (channels <= 0) {
MS_LOG(ERROR) << "channels must be greater than 0";
return RET_ERROR;
}
size_t one_filter_size = elem_count / channels;
bool do_quant = quant_param_size / (sizeof(float) * 8 - bit_num) < one_filter_size;
if (!do_quant && quant_type == schema::QuantType_WeightQuant) {
MS_LOG(INFO) << "too few elements in a filter, no need to quantize. " << one_filter_size;
return RET_QUANT_CONTINUE;
}
for (int i = 0; i < channels; i++) {
float min = FLT_MAX;
float max = -FLT_MAX;
GetMaxMinPerchannel(channels, one_filter_size, i, elem_count, raw_datas, channel_at_first, &max, &min);
schema::QuantParamT quant_param;
STATUS status = CalQuantizationParams(&quant_param, min, max, false, quant_max, quant_min, bit_num);
if (status != RET_OK) {
MS_LOG(ERROR) << "CalQuantizationParams failed" << status;
return status;
}
// do quantization
double average_dequant = 0;
double average_raw = 0;
for (uint32_t j = 0; j < one_filter_size; j++) {
auto index = j + i * one_filter_size;
if (!channel_at_first) {
index = j * channels + i;
}
MS_ASSERT(index < elem_count);
float raw_data = raw_datas[index];
auto quant_data = QuantizeData<T>(raw_data, &quant_param, quant_max, quant_min);
(*quant_datas)[index] = quant_data;
if (quant_type == schema::QuantType_WeightQuant) {
float dequant_data = quant_param.scale * (quant_data - quant_param.zeroPoint);
dequant_datas[index] = dequant_data;
average_dequant += dequant_data;
average_raw += raw_data;
}
}
if (quant_type == schema::QuantType_WeightQuant && !k_means) {
// mean
average_dequant = average_dequant / one_filter_size;
average_raw = average_raw / one_filter_size;
// std
double variance_dequant = 0;
double variance_raw = 0;
for (uint32_t j = 0; j < one_filter_size; j++) {
auto index = j + i * one_filter_size;
if (!channel_at_first) {
index = j * channels + i;
}
MS_ASSERT(index < elem_count);
variance_dequant += std::pow(dequant_datas[index] - average_dequant, 2);
variance_raw += std::pow(raw_datas[index] - average_raw, 2);
}
variance_dequant = std::sqrt(variance_dequant / one_filter_size);
variance_raw = std::sqrt(variance_raw / one_filter_size);
quant_param.varCorr = 1;
if (variance_raw != 0 && variance_dequant != 0) {
auto temp_var_corr = variance_raw / variance_dequant;
if (temp_var_corr > 0 && temp_var_corr < 10) {
quant_param.varCorr = temp_var_corr;
} else {
MS_LOG(WARNING) << "unexpected var_corr: " << temp_var_corr;
}
}
quant_param.meanCorr = average_raw - average_dequant * quant_param.varCorr;
}
quant_params->emplace_back(quant_param);
}
return RET_OK;
}
} // namespace lite
} // namespace mindspore
#endif // MINDSPORE_LITE_SRC_COMMON_QUANT_UTILS_H_

View File

@ -23,46 +23,93 @@
#include <set>
#include "schema/inner/model_generated.h"
#include "src/train/train_utils.h"
#include "src/common/quant_utils.h"
#include "tools/common/storage.h"
namespace mindspore {
namespace lite {
std::vector<uint8_t> TrainExport::CreateData(const mindspore::lite::Tensor *tensor) {
std::vector<uint8_t> TrainExport::CreateData(const lite::Tensor *tensor) {
uint8_t *tensor_data = reinterpret_cast<uint8_t *>(tensor->data_c());
auto size = tensor->Size();
std::vector<uint8_t> data(tensor_data, tensor_data + size);
return data;
}
bool TrainExport::NeedQuantization(const lite::Tensor *tensor) {
return (tensor->quant_params().size() > 0 && tensor->quant_params().at(0).inited);
}
schema::QuantType TrainExport::GetNodeQuantType(const kernel::LiteKernel *kernel) {
if (std::any_of(kernel->in_tensors().cbegin(), kernel->in_tensors().cend(), [](const lite::Tensor *t) {
return (t->IsConst() && (t->quant_params().size() > 0) && (t->quant_params().at(0).inited));
})) {
return schema::QuantType_QUANT_WEIGHT;
}
return schema::QuantType_QUANT_NONE;
}
int TrainExport::QuantTensorData(schema::TensorT *dest_tensor, const lite::Tensor *src_tensor) {
int channels = src_tensor->quant_params().size();
if (channels < 1) {
MS_LOG(ERROR) << "Quant Params is empty";
return RET_ERROR;
}
int bit_num = src_tensor->quant_params().at(0).bitNum;
int quant_max = QuantMax(bit_num, kNumberTypeInt8);
int quant_min = QuantMin(bit_num, kNumberTypeInt8);
std::vector<int8_t> data(src_tensor->ElementsNum());
std::vector<schema::QuantParamT> quant_params;
STATUS ret = RET_OK;
if (channels == kPerTensor) {
ret = DoPerLayerQuant<int8_t>(reinterpret_cast<float *>(src_tensor->data_c()), src_tensor->ElementsNum(),
&(quant_params), quant_max, quant_min, bit_num, false, &data);
} else {
bool channel_at_first = (src_tensor->shape().at(0) == channels);
ret = DoPerChannelQuant<int8_t>(reinterpret_cast<float *>(src_tensor->data_c()), src_tensor->ElementsNum(),
schema::QuantType_WeightQuant, &(quant_params), quant_max, quant_min, bit_num,
false, &data, channels, channel_at_first);
}
if (ret == RET_QUANT_CONTINUE) {
MS_LOG(DEBUG) << "No Need to quant per channel";
return RET_OK;
}
if (ret == RET_ERROR) {
MS_LOG(ERROR) << "QuantTensorData error, channels = " << channels;
return ret;
}
if (quant_params.empty()) {
MS_LOG(ERROR) << "quant_params empty";
return RET_ERROR;
}
dest_tensor->data = std::vector<uint8_t>(data.data(), data.data() + data.size());
dest_tensor->dataType = kNumberTypeInt8;
dest_tensor->quantParams.clear();
for (auto quant_param : quant_params) {
dest_tensor->quantParams.emplace_back(std::make_unique<schema::QuantParamT>(quant_param));
}
return RET_OK;
}
std::unique_ptr<schema::TensorT> TrainExport::CreateTensor(const mindspore::lite::Tensor *tensor,
schema::Tensor *scTensor) {
auto tensorT = std::make_unique<schema::TensorT>();
tensorT->nodeType = scTensor->nodeType();
tensorT->dataType = tensor->data_type();
tensorT->dims = tensor->shape();
tensorT->format = tensor->format();
tensorT->name = tensor->tensor_name();
tensorT->refCount = 0;
tensorT->offset = 0;
tensorT->dataType = tensor->data_type();
tensorT->enableHuffmanCode = false;
if ((tensorT->nodeType == NodeType_ValueNode) && (scTensor->data() != nullptr) && (scTensor->data()->size() > 0)) {
tensorT->data = CreateData(tensor);
}
for (auto quant_param : tensor->quant_params()) {
auto quantParamT = std::make_unique<schema::QuantParamT>();
quantParamT->scale = quant_param.scale;
quantParamT->zeroPoint = quant_param.zeroPoint;
quantParamT->min = 0;
quantParamT->max = 0;
quantParamT->narrowRange = true;
quantParamT->numBits = quant_param.bitNum;
quantParamT->inited = quant_param.inited;
quantParamT->varCorr = quant_param.var_corr;
quantParamT->meanCorr = quant_param.mean_corr;
quantParamT->dstDtype = quant_param.dstDtype;
quantParamT->roundType = quant_param.roundType;
quantParamT->multiplier = quant_param.multiplier;
tensorT->quantParams.emplace_back(std::move(quantParamT));
if (NeedQuantization(tensor)) {
QuantTensorData(tensorT.get(), tensor);
} else {
tensorT->data = CreateData(tensor);
}
}
tensorT->quantClusters = tensor->quant_clusters();
return tensorT;
@ -85,7 +132,7 @@ std::unique_ptr<schema::CNodeT> TrainExport::CreateCNode(const mindspore::kernel
cnodeT->inputIndex = inputIndex;
cnodeT->outputIndex = outputIndex;
cnodeT->name = kernel->name();
cnodeT->quantType = schema::QuantType_QUANT_NONE;
cnodeT->quantType = GetNodeQuantType(kernel);
// find kernel in model
auto *node = FindNode(kernel);
if (node == nullptr) {
@ -132,7 +179,6 @@ int TrainExport::Export(const std::vector<mindspore::kernel::LiteKernel *> &kern
MS_LOG(ERROR) << "cannot find tensor " + tensor->ToString() + " in model";
return RET_ERROR;
}
out_set.insert(id);
auto it = remap.find(id);
if (it == remap.end()) {
remap[id] = tensor_idx;
@ -153,7 +199,7 @@ int TrainExport::Export(const std::vector<mindspore::kernel::LiteKernel *> &kern
schema::Tensor *scTensor = model_->all_tensors_.at(id);
auto tensorT = CreateTensor(tensor, scTensor);
// find a tensor which is not an output
if (out_set.find(id) == out_set.end()) {
if (out_set.find(remap[id]) == out_set.end()) {
if ((tensorT->nodeType == NodeType_ValueNode) && (tensorT->data.size() == 0)) {
meta_graph->inputIndex.push_back(remap[id]);
}
@ -165,7 +211,7 @@ int TrainExport::Export(const std::vector<mindspore::kernel::LiteKernel *> &kern
meta_graph->allTensors.emplace_back(std::move(tensorT));
}
auto graph = meta_graph.release();
int err = SaveToFile(graph, file_name_);
int err = Storage::Save(*graph, file_name_);
if (err != RET_OK) {
MS_LOG(ERROR) << "failed to save flatbuffer file " << file_name_;
}
@ -173,30 +219,5 @@ int TrainExport::Export(const std::vector<mindspore::kernel::LiteKernel *> &kern
return err;
}
int TrainExport::SaveToFile(const schema::MetaGraphT *graph, const std::string &outputPath) {
flatbuffers::FlatBufferBuilder builder(1024);
auto offset = schema::MetaGraph::Pack(builder, graph);
builder.Finish(offset);
schema::FinishMetaGraphBuffer(builder, offset);
int size = builder.GetSize();
auto content = builder.GetBufferPointer();
if (content == nullptr) {
MS_LOG(ERROR) << "GetBufferPointer nullptr";
return RET_ERROR;
}
if (access((outputPath + ".ms").c_str(), F_OK) == 0) {
chmod((outputPath + ".ms").c_str(), S_IWUSR);
}
std::ofstream output(outputPath + ".ms", std::ofstream::binary);
if (!output.is_open()) {
MS_LOG(ERROR) << "Can not open output file: " << outputPath << ".ms";
return RET_ERROR;
}
output.write((const char *)content, size);
output.close();
chmod((outputPath + ".ms").c_str(), S_IRUSR);
return RET_OK;
}
} // namespace lite
} // namespace mindspore

View File

@ -50,7 +50,10 @@ class TrainExport {
std::unique_ptr<schema::TensorT> CreateTensor(const mindspore::lite::Tensor *tensor, schema::Tensor *scTensor);
std::unique_ptr<schema::CNodeT> CreateCNode(const mindspore::kernel::LiteKernel *kernel,
std::vector<uint32_t> inputIndex, std::vector<uint32_t> outputIndex);
int SaveToFile(const schema::MetaGraphT *graph, const std::string &outputPath);
bool NeedQuantization(const mindspore::lite::Tensor *tensor);
virtual int QuantTensorData(schema::TensorT *dest_tensor, const mindspore::lite::Tensor *src_tensor);
mindspore::schema::QuantType GetNodeQuantType(const mindspore::kernel::LiteKernel *kernel);
};
}; // namespace lite
} // namespace mindspore

View File

@ -32,7 +32,7 @@ std::vector<bool> StringToBitVector(const std::string &str) {
}
STATUS IndexingDecompress(const schema::Tensor &src_tensor, Tensor *dst_tensor) {
MS_LOG(ERROR) << "un-index weight";
MS_LOG(DEBUG) << "un-index weight";
auto bit_num = src_tensor.quantParams()->Get(0)->numBits();
std::string str(reinterpret_cast<const char *>(src_tensor.data()->data()), src_tensor.data()->size());

View File

@ -159,6 +159,7 @@ set(TEST_LITE_SRC
${LITE_DIR}/src/common/utils.cc
${LITE_DIR}/src/common/loader_util.cc
${LITE_DIR}/src/common/string_util.cc
${LITE_DIR}/src/common/quant_utils.cc
${LITE_DIR}/tools/common/flag_parser.cc
${LITE_DIR}/tools/benchmark/benchmark.cc
${LITE_DIR}/test/st/benchmark_test.cc
@ -297,6 +298,7 @@ if(SUPPORT_TRAIN)
${LITE_DIR}/src/train/train_utils.cc
${LITE_DIR}/src/train/transfer_session.cc
${LITE_DIR}/src/lite_session.cc
${LITE_DIR}/tools/common/storage.cc
)
else()
set(TEST_LITE_SRC
@ -388,6 +390,10 @@ endif()
add_executable(lite-test ${TEST_SRC})
add_dependencies(lite-test fbs_src)
if(SUPPORT_TRAIN)
add_dependencies(lite-test fbs_inner_src)
endif()
target_link_libraries(lite-test dl mindspore::gtest)
if(PLATFORM_ARM AND ENABLE_FP16)

View File

@ -11,7 +11,7 @@ googlenet
densenet
shufflenetv2
mini_alexnet weight_quant 2
nin weight_quant 7
nin weight_quant 9
lenet weight_quant 5
mobilenetv1 weight_quant 2
mobilenetv2 weight_quant 2

View File

@ -82,22 +82,27 @@ function Run_x86() {
model_prefix=${line_array[0]}
model_name=${line_array[0]}'_train'
accuracy_limit=0.5
export_file=""
inference_file=""
if [[ $model_name == \#* ]]; then
continue
fi
if [[ "${line_array[1]}" == "weight_quant" ]]; then
model_name=${line_array[0]}'_train_quant'
accuracy_limit=${line_array[2]}
else
export_file="${ms_models_path}/${model_name}_tod"
rm -f ${export_file}"*"
fi
if [[ "${save_lite}" == "1" ]]; then
inference_file="${ms_models_path}/${model_name}_infer"
fi
inference_file="${ms_models_path}/${model_name}_infer"
rm -f ${inference_file}"*"
echo ${model_name} >> "${run_x86_log_file}"
${run_valgrind}./tools/benchmark_train/benchmark_train \
--modelFile=${ms_models_path}/${model_name}.ms \
--inDataFile=${train_io_path}/${model_prefix}_input1.bin,${train_io_path}/${model_prefix}_input2.bin \
--inDataFile=${train_io_path}/${model_prefix}_input \
--expectedDataFile=${train_io_path}/${model_prefix}_output --epochs=${epoch_num} --numThreads=${threads} \
--accuracyThreshold=${accuracy_limit} --inferenceFile=${inference_file} >> "${run_x86_log_file}"
--accuracyThreshold=${accuracy_limit} --inferenceFile=${inference_file} \
--exportFile=${export_file} >> "${run_x86_log_file}"
if [ $? = 0 ]; then
run_result='x86: '${model_name}' pass'; echo ${run_result} >> ${run_benchmark_train_result_file}
else
@ -168,21 +173,22 @@ function Run_arm() {
model_prefix=${line_array[0]}
model_name=${line_array[0]}'_train'
accuracy_limit=0.5
export_file=""
if [[ $model_name == \#* ]]; then
continue
fi
if [[ "${line_array[1]}" == "weight_quant" ]]; then
model_name=${line_array[0]}'_train_quant'
accuracy_limit=${line_array[2]}
else
export_file="${tmp_dir}/${model_name}_tod"
fi
inference_file="${tmp_dir}/${model_name}_infer"
if [[ "${line_array[1]}" == "noarm32" ]] && [[ "$1" == arm32 ]]; then
run_result=$1': '${model_name}' irrelevant'; echo ${run_result} >> ${run_benchmark_train_result_file}
continue
fi
if [[ "${save_lite}" == "1" ]]; then
inference_file="${ms_models_path}/${model_name}_infer"
fi
# run benchmark_train test without clib data
echo ${model_name} >> "${run_arm_log_file}"
adb -s ${device_id} push ${train_io_path}/${model_prefix}_input*.bin ${train_io_path}/${model_prefix}_output*.bin /data/local/tmp/benchmark_train_test >> ${adb_push_log_file}
@ -193,15 +199,20 @@ function Run_arm() {
elif [ "$1" == arm32 ]; then
echo 'cp /data/local/tmp/arm32/libc++_shared.so ./' >> ${adb_cmd_run_file}
fi
echo "rm -f ${tmp_dir}/${model_name}_exported.ms" >> ${run_arm_log_file}
echo "rm -f ${tmp_dir}/${model_name}_exported.ms" >> ${adb_cmd_run_file}
adb -s ${device_id} shell < ${adb_cmd_run_file} >> ${run_arm_log_file}
echo "rm -f ${export_file} ${inference_file}.ms" >> ${run_arm_log_file}
echo "rm -f ${export_file} ${inference_file}.ms" >> ${adb_cmd_run_file}
adb -s ${device_id} shell < ${adb_cmd_run_file} >> ${run_arm_log_file}
adb_cmd=$(cat <<-ENDM
export LD_LIBRARY_PATH=./:/data/local/tmp/:/data/local/tmp/benchmark_train_test;./benchmark_train \
--epochs=${epoch_num} \
--modelFile=${model_name}.ms \
--inDataFile=${tmp_dir}/${model_prefix}_input1.bin,${tmp_dir}/${model_prefix}_input2.bin \
--inDataFile=${tmp_dir}/${model_prefix}_input \
--expectedDataFile=${tmp_dir}/${model_prefix}_output \
--numThreads=${threads} --accuracyThreshold=${accuracy_limit} --inferenceFile=${inference_file}
--numThreads=${threads} \
--accuracyThreshold=${accuracy_limit} \
--inferenceFile=${inference_file} \
--exportFile=${export_file}
ENDM
)
echo "${adb_cmd}" >> ${run_arm_log_file}
@ -252,7 +263,7 @@ models_mindspore_train_config=${basepath}/models_ms_train.cfg
epoch_num=1
threads=2
train_io_path=""
while getopts "r:M:c:m:d:i:e:vt:q:DF" opt; do
while getopts "r:M:c:m:d:i:e:vt:q:D" opt; do
case ${opt} in
r)
release_path=${OPTARG}
@ -295,8 +306,6 @@ while getopts "r:M:c:m:d:i:e:vt:q:DF" opt; do
epoch_num=${OPTARG}
echo "train epoch num is ${epoch_num}"
;;
F) save_lite=1
;;
?)
echo "unknown para"
exit 1;;

View File

@ -107,7 +107,7 @@ static STATUS CompressTensor(schema::TensorT *tensor_input, const std::unique_pt
int bit_num = tensor_input->quantParams.at(0)->numBits;
// Pack Repetition
auto repetition_packed = false;
MS_LOG(ERROR) << dst_node->name;
MS_LOG(DEBUG) << dst_node->name;
if (dst_node->quantType == schema::QuantType_QUANT_WEIGHT) {
if (bit_num <= 8) {
repetition_packed = PackRepetition<int8_t>(bit_num, tensor_input);

View File

@ -32,8 +32,6 @@
namespace mindspore {
namespace lite {
static const char *DELIM_COLON = ":";
static const char *DELIM_COMMA = ",";
static const char *DELIM_SLASH = "/";
namespace {
@ -81,8 +79,8 @@ int NetTrain::GenerateRandomData(size_t size, void *data) {
return RET_OK;
}
int NetTrain::GenerateInputData() {
for (auto tensor : ms_inputs_) {
int NetTrain::GenerateInputData(std::vector<mindspore::tensor::MSTensor *> *ms_inputs) {
for (auto tensor : *ms_inputs) {
MS_ASSERT(tensor != nullptr);
auto input_data = tensor->MutableData();
if (input_data == nullptr) {
@ -100,16 +98,16 @@ int NetTrain::GenerateInputData() {
return RET_OK;
}
int NetTrain::LoadInput() {
int NetTrain::LoadInput(std::vector<mindspore::tensor::MSTensor *> *ms_inputs) {
if (flags_->in_data_file_.empty()) {
auto status = GenerateInputData();
auto status = GenerateInputData(ms_inputs);
if (status != RET_OK) {
std::cerr << "Generate input data error " << status << std::endl;
MS_LOG(ERROR) << "Generate input data error " << status;
return status;
}
} else {
auto status = ReadInputFile();
auto status = ReadInputFile(ms_inputs);
if (status != RET_OK) {
std::cerr << "ReadInputFile error, " << status << std::endl;
MS_LOG(ERROR) << "ReadInputFile error, " << status;
@ -119,8 +117,8 @@ int NetTrain::LoadInput() {
return RET_OK;
}
int NetTrain::ReadInputFile() {
if (ms_inputs_.empty()) {
int NetTrain::ReadInputFile(std::vector<mindspore::tensor::MSTensor *> *ms_inputs) {
if (ms_inputs->empty()) {
return RET_OK;
}
@ -128,16 +126,12 @@ int NetTrain::ReadInputFile() {
MS_LOG(ERROR) << "Not supported image input";
return RET_ERROR;
} else {
if (ms_inputs_.size() > flags_->input_data_list_.size()) {
MS_LOG(ERROR) << "missing input files expecting " << ms_inputs_.size() << ",got "
<< flags_->input_data_list_.size();
return RET_ERROR;
}
for (size_t i = 0; i < ms_inputs_.size(); i++) {
auto cur_tensor = ms_inputs_.at(i);
for (size_t i = 0; i < ms_inputs->size(); i++) {
auto cur_tensor = ms_inputs->at(i);
MS_ASSERT(cur_tensor != nullptr);
size_t size;
char *bin_buf = ReadFile(flags_->input_data_list_[i].c_str(), &size);
std::string file_name = flags_->in_data_file_ + std::to_string(i + 1) + ".bin";
char *bin_buf = ReadFile(file_name.c_str(), &size);
if (bin_buf == nullptr) {
MS_LOG(ERROR) << "ReadFile return nullptr";
return RET_ERROR;
@ -158,94 +152,12 @@ int NetTrain::ReadInputFile() {
return RET_OK;
}
int NetTrain::CompareOutput() {
std::cout << "================ Comparing Output data ================" << std::endl;
float total_bias = 0;
int total_size = 0;
bool has_error = false;
auto tensors_list = session_->GetOutputs();
if (tensors_list.empty()) {
MS_LOG(ERROR) << "Cannot find output tensors, get model output failed";
return RET_ERROR;
}
mindspore::tensor::MSTensor *tensor = nullptr;
int i = 1;
for (auto it = tensors_list.begin(); it != tensors_list.end(); ++it) {
tensor = session_->GetOutputByTensorName(it->first);
std::cout << "output is tensor " << it->first << "\n";
auto outputs = tensor->MutableData();
size_t size;
std::string output_file = flags_->data_file_ + std::to_string(i) + ".bin";
auto *bin_buf = ReadFileBuf(output_file.c_str(), &size);
if (bin_buf == nullptr) {
MS_LOG(ERROR) << "ReadFile return nullptr";
return RET_ERROR;
}
if (flags_->enable_fp16_ && tensor->data_type() == kNumberTypeFloat16) {
if (static_cast<int>(size / sizeof(float)) != tensor->ElementsNum()) {
MS_LOG(ERROR) << "Output buffer and output file differ by size. Tensor size: " << tensor->Size()
<< ", read size: " << size / sizeof(float);
return RET_ERROR;
}
} else {
if (size != tensor->Size()) {
MS_LOG(ERROR) << "Output buffer and output file differ by size. Tensor size: " << tensor->Size()
<< ", read size: " << size;
return RET_ERROR;
}
}
float bias = 0.f;
if (flags_->enable_fp16_ && tensor->data_type() == kNumberTypeFloat16) {
#ifdef ENABLE_FP16
bias = CompareData<float16_t>(bin_buf, tensor->ElementsNum(), reinterpret_cast<float16_t *>(outputs));
#endif
} else {
bias = CompareData<float>(bin_buf, tensor->ElementsNum(), reinterpret_cast<float *>(outputs));
}
if (bias >= 0) {
total_bias += bias;
total_size++;
} else {
has_error = true;
break;
}
i++;
delete[] bin_buf;
}
if (!has_error) {
float mean_bias;
if (total_size != 0) {
mean_bias = total_bias / total_size * 100;
} else {
mean_bias = 0;
}
std::cout << "Mean bias of all nodes/tensors: " << mean_bias << "%"
<< " threshold is:" << this->flags_->accuracy_threshold_ << std::endl;
std::cout << "=======================================================" << std::endl << std::endl;
if (mean_bias > this->flags_->accuracy_threshold_) {
MS_LOG(ERROR) << "Mean bias of all nodes/tensors is too big: " << mean_bias << "%";
std::cerr << "Mean bias of all nodes/tensors is too big: " << mean_bias << "%" << std::endl;
return RET_ERROR;
} else {
return RET_OK;
}
} else {
MS_LOG(ERROR) << "Error in CompareData";
std::cerr << "Error in CompareData" << std::endl;
std::cout << "=======================================================" << std::endl << std::endl;
return RET_ERROR;
}
}
int NetTrain::CompareOutputLite(const std::unique_ptr<session::LiteSession> &lite_session) {
int NetTrain::CompareOutput(const session::LiteSession &lite_session) {
std::cout << "================ Comparing Forward Output data ================" << std::endl;
float total_bias = 0;
int total_size = 0;
bool has_error = false;
auto tensors_list = lite_session->GetOutputs();
auto tensors_list = lite_session.GetOutputs();
if (tensors_list.empty()) {
MS_LOG(ERROR) << "Cannot find output tensors, get model output failed";
return RET_ERROR;
@ -253,9 +165,9 @@ int NetTrain::CompareOutputLite(const std::unique_ptr<session::LiteSession> &lit
mindspore::tensor::MSTensor *tensor = nullptr;
int i = 1;
for (auto it = tensors_list.begin(); it != tensors_list.end(); ++it) {
tensor = lite_session->GetOutputByTensorName(it->first);
tensor = lite_session.GetOutputByTensorName(it->first);
std::cout << "output is tensor " << it->first << "\n";
auto outputs = tensor->MutableData();
auto outputs = tensor->data();
size_t size;
std::string output_file = flags_->data_file_ + std::to_string(i) + ".bin";
auto *bin_buf = ReadFileBuf(output_file.c_str(), &size);
@ -307,7 +219,7 @@ int NetTrain::CompareOutputLite(const std::unique_ptr<session::LiteSession> &lit
}
}
int NetTrain::MarkPerformance() {
int NetTrain::MarkPerformance(session::TrainSession *session) {
MS_LOG(INFO) << "Running train loops...";
std::cout << "Running train loops..." << std::endl;
uint64_t time_min = 0xFFFFFFFFFFFFFFFF;
@ -315,10 +227,10 @@ int NetTrain::MarkPerformance() {
uint64_t time_avg = 0;
for (int i = 0; i < flags_->epochs_; i++) {
session_->BindThread(true);
session->BindThread(true);
auto start = GetTimeUs();
auto status =
flags_->time_profiling_ ? session_->RunGraph(before_call_back_, after_call_back_) : session_->RunGraph();
flags_->time_profiling_ ? session->RunGraph(before_call_back_, after_call_back_) : session->RunGraph();
if (status != 0) {
MS_LOG(ERROR) << "Inference error " << status;
std::cerr << "Inference error " << status;
@ -330,7 +242,7 @@ int NetTrain::MarkPerformance() {
time_min = std::min(time_min, time);
time_max = std::max(time_max, time);
time_avg += time;
session_->BindThread(false);
session->BindThread(false);
}
if (flags_->time_profiling_) {
@ -352,10 +264,9 @@ int NetTrain::MarkPerformance() {
return RET_OK;
}
int NetTrain::MarkAccuracy() {
int NetTrain::MarkAccuracy(session::LiteSession *session) {
MS_LOG(INFO) << "MarkAccuracy";
std::cout << "MarkAccuracy" << std::endl;
for (auto &msInput : ms_inputs_) {
for (auto &msInput : session->GetInputs()) {
switch (msInput->data_type()) {
case TypeId::kNumberTypeFloat:
PrintInputData<float>(msInput);
@ -371,50 +282,14 @@ int NetTrain::MarkAccuracy() {
return RET_ERROR;
}
}
session_->Eval();
auto status = session_->RunGraph(before_call_back_, after_call_back_);
auto status = session->RunGraph();
if (status != RET_OK) {
MS_LOG(ERROR) << "Inference error " << status;
std::cerr << "Inference error " << status << std::endl;
return status;
}
status = CompareOutput();
if (status != RET_OK) {
MS_LOG(ERROR) << "Compare output error " << status;
std::cerr << "Compare output error " << status << std::endl;
return status;
}
return RET_OK;
}
int NetTrain::MarkAccuracyLite(const std::unique_ptr<session::LiteSession> &lite_session) {
MS_LOG(INFO) << "MarkAccuracy";
std::cout << "MarkAccuracy" << std::endl;
for (auto &msInput : ms_inputs_) {
switch (msInput->data_type()) {
case TypeId::kNumberTypeFloat:
PrintInputData<float>(msInput);
break;
case TypeId::kNumberTypeFloat32:
PrintInputData<float>(msInput);
break;
case TypeId::kNumberTypeInt32:
PrintInputData<int>(msInput);
break;
default:
MS_LOG(ERROR) << "Datatype " << msInput->data_type() << " is not supported.";
return RET_ERROR;
}
}
auto status = lite_session->RunGraph();
if (status != RET_OK) {
MS_LOG(ERROR) << "Inference error " << status;
std::cerr << "Inference error " << status << std::endl;
return status;
}
status = CompareOutputLite(lite_session);
status = CompareOutput(*session);
if (status != RET_OK) {
MS_LOG(ERROR) << "Compare output error " << status;
std::cerr << "Compare output error " << status << std::endl;
@ -423,228 +298,106 @@ int NetTrain::MarkAccuracyLite(const std::unique_ptr<session::LiteSession> &lite
return RET_OK;
}
int NetTrain::RunExportedNet() {
static CpuBindMode FlagToBindMode(int flag) {
if (flag == 2) {
return MID_CPU;
}
if (flag == 1) {
return HIGHER_CPU;
}
return NO_BIND;
}
int NetTrain::CreateAndRunNetwork(const std::string &filename, int train_session, int epochs) {
auto start_prepare_time = GetTimeUs();
// Load graph
std::string model_name = flags_->export_file_.substr(flags_->export_file_.find_last_of(DELIM_SLASH) + 1);
std::string model_name = filename.substr(filename.find_last_of(DELIM_SLASH) + 1);
Context context;
context.device_list_[0].device_info_.cpu_device_info_.cpu_bind_mode_ = FlagToBindMode(flags_->cpu_bind_mode_);
context.device_list_[0].device_info_.cpu_device_info_.enable_float16_ = flags_->enable_fp16_;
context.device_list_[0].device_type_ = mindspore::lite::DT_CPU;
context.thread_num_ = flags_->num_threads_;
MS_LOG(INFO) << "start reading exported model file";
std::cout << "start reading exported model file" << std::endl;
auto context = std::make_shared<Context>();
if (context == nullptr) {
MS_LOG(ERROR) << "New context failed while running " << model_name.c_str();
std::cerr << "New context failed while running " << model_name.c_str() << std::endl;
return RET_ERROR;
}
if (flags_->cpu_bind_mode_ == 2) {
context->device_list_[0].device_info_.cpu_device_info_.cpu_bind_mode_ = MID_CPU;
} else if (flags_->cpu_bind_mode_ == 1) {
context->device_list_[0].device_info_.cpu_device_info_.cpu_bind_mode_ = HIGHER_CPU;
} else {
context->device_list_[0].device_info_.cpu_device_info_.cpu_bind_mode_ = NO_BIND;
}
context->thread_num_ = flags_->num_threads_;
auto *model = mindspore::lite::Model::Import(flags_->export_file_.c_str());
MS_LOG(INFO) << "start reading model file" << filename.c_str();
std::cout << "start reading model file " << filename.c_str() << std::endl;
auto *model = mindspore::lite::Model::Import(filename.c_str());
if (model == nullptr) {
MS_LOG(ERROR) << "create model for train session failed";
return RET_ERROR;
}
session_ = session::TrainSession::CreateSession(model, context.get());
if (session_ == nullptr) {
MS_LOG(ERROR) << "ExportedFile CreateSession failed while running " << model_name.c_str();
std::cout << "CreateSession failed while running " << model_name.c_str() << std::endl;
return RET_ERROR;
}
if (flags_->loss_name_ != "") {
session_->SetLossName(flags_->loss_name_);
}
ms_inputs_ = session_->GetInputs();
auto end_prepare_time = GetTimeUs();
MS_LOG(INFO) << "Exported model PrepareTime = " << (end_prepare_time - start_prepare_time) / 1000 << " ms";
std::cout << "Exported model PrepareTime = " << (end_prepare_time - start_prepare_time) / 1000 << " ms" << std::endl;
// Load input
MS_LOG(INFO) << "start generate input data";
auto status = LoadInput();
if (status != RET_OK) {
MS_LOG(ERROR) << "Generate input data error";
return status;
}
if (!flags_->data_file_.empty()) {
MS_LOG(INFO) << "Check accuracy for exported model";
std::cout << "Check accuracy for exported model " << std::endl;
status = MarkAccuracy();
for (auto &data : data_) {
data.second->shape.clear();
data.second->data.clear();
delete data.second;
}
data_.clear();
if (status != RET_OK) {
MS_LOG(ERROR) << "Run MarkAccuracy on exported model error: " << status;
std::cout << "Run MarkAccuracy on exported model error: " << status << std::endl;
return status;
}
}
return RET_OK;
}
int NetTrain::RunExportedNetLite(std::string file_name) {
auto start_prepare_time = GetTimeUs();
// Load graph
std::string model_name = file_name.substr(file_name.find_last_of(DELIM_SLASH) + 1);
MS_LOG(INFO) << "start reading exported model file";
std::cout << "reading " << file_name << std::endl;
auto context = std::make_shared<Context>();
if (context == nullptr) {
MS_LOG(ERROR) << "New context failed while running " << model_name.c_str();
std::cerr << "New context failed while running " << model_name.c_str() << std::endl;
return RET_ERROR;
}
if (flags_->cpu_bind_mode_ == 2) {
context->device_list_[0].device_info_.cpu_device_info_.cpu_bind_mode_ = MID_CPU;
} else if (flags_->cpu_bind_mode_ == 1) {
context->device_list_[0].device_info_.cpu_device_info_.cpu_bind_mode_ = HIGHER_CPU;
} else {
context->device_list_[0].device_info_.cpu_device_info_.cpu_bind_mode_ = NO_BIND;
}
context->thread_num_ = flags_->num_threads_;
auto *model = mindspore::lite::Model::Import(file_name.c_str());
if (model == nullptr) {
MS_LOG(ERROR) << "create model for lite session failed";
return RET_ERROR;
}
auto lite_session = std::unique_ptr<session::LiteSession>(session::LiteSession::CreateSession(context.get()));
if (lite_session == nullptr) {
MS_LOG(ERROR) << "ExportedFile CreateSession failed while running " << model_name.c_str();
std::cout << "CreateSession failed while running " << model_name.c_str() << std::endl;
return RET_ERROR;
}
if (lite_session->CompileGraph(model) != RET_OK) {
MS_LOG(ERROR) << "Cannot compile model";
delete model;
return RET_ERROR;
}
ms_inputs_ = lite_session->GetInputs();
auto end_prepare_time = GetTimeUs();
MS_LOG(INFO) << "Exported model PrepareTime = " << (end_prepare_time - start_prepare_time) / 1000 << " ms";
std::cout << "Exported model PrepareTime = " << (end_prepare_time - start_prepare_time) / 1000 << " ms" << std::endl;
// Load input
MS_LOG(INFO) << "start generate input data";
auto status = LoadInput();
if (status != RET_OK) {
MS_LOG(ERROR) << "Generate input data error";
delete model;
return status;
}
if (!flags_->data_file_.empty()) {
MS_LOG(INFO) << "Check accuracy for exported model";
std::cout << "Check accuracy for exported model " << std::endl;
status = MarkAccuracyLite(lite_session);
for (auto &data : data_) {
data.second->shape.clear();
data.second->data.clear();
delete data.second;
}
data_.clear();
if (status != RET_OK) {
MS_LOG(ERROR) << "Run MarkAccuracy on exported model error: " << status;
std::cout << "Run MarkAccuracy on exported model error: " << status << std::endl;
session::LiteSession *session = nullptr;
session::TrainSession *t_session = nullptr;
if (train_session) {
t_session = session::TrainSession::CreateSession(model, &context);
if (t_session == nullptr) {
MS_LOG(ERROR) << "RunNetTrain CreateSession failed while running " << model_name.c_str();
std::cout << "RunNetTrain CreateSession failed while running " << model_name.c_str() << std::endl;
delete model;
return status;
return RET_ERROR;
}
}
delete model;
return RET_OK;
}
int NetTrain::RunNetTrain() {
auto start_prepare_time = GetTimeUs();
// Load graph
std::string model_name = flags_->model_file_.substr(flags_->model_file_.find_last_of(DELIM_SLASH) + 1);
MS_LOG(INFO) << "start reading model file";
std::cout << "start reading model file" << std::endl;
auto context = std::make_shared<Context>();
if (context == nullptr) {
MS_LOG(ERROR) << "New context failed while running " << model_name.c_str();
std::cerr << "New context failed while running " << model_name.c_str() << std::endl;
return RET_ERROR;
}
if (flags_->cpu_bind_mode_ == 2) {
context->device_list_[0].device_info_.cpu_device_info_.cpu_bind_mode_ = MID_CPU;
} else if (flags_->cpu_bind_mode_ == 1) {
context->device_list_[0].device_info_.cpu_device_info_.cpu_bind_mode_ = HIGHER_CPU;
if (flags_->loss_name_ != "") {
t_session->SetLossName(flags_->loss_name_);
}
if (epochs > 0) {
t_session->Train();
}
session = t_session;
} else {
context->device_list_[0].device_info_.cpu_device_info_.cpu_bind_mode_ = NO_BIND;
}
context->device_list_[0].device_info_.cpu_device_info_.enable_float16_ = flags_->enable_fp16_;
layer_checksum_ = flags_->layer_checksum_;
context->thread_num_ = flags_->num_threads_;
auto *model = mindspore::lite::Model::Import(flags_->model_file_.c_str());
if (model == nullptr) {
MS_LOG(ERROR) << "create model for train session failed";
return RET_ERROR;
}
session_ = session::TrainSession::CreateSession(model, context.get());
if (session_ == nullptr) {
MS_LOG(ERROR) << "RunNetTrain CreateSession failed while running " << model_name.c_str();
std::cout << "RunNetTrain CreateSession failed while running " << model_name.c_str() << std::endl;
return RET_ERROR;
session = session::LiteSession::CreateSession(&context);
if (session == nullptr) {
MS_LOG(ERROR) << "ExportedFile CreateSession failed while running " << model_name.c_str();
std::cout << "CreateSession failed while running " << model_name.c_str() << std::endl;
delete model;
return RET_ERROR;
}
if (session->CompileGraph(model) != RET_OK) {
MS_LOG(ERROR) << "Cannot compile model";
delete model;
return RET_ERROR;
}
delete model;
}
if (flags_->loss_name_ != "") {
session_->SetLossName(flags_->loss_name_);
}
session_->Train();
ms_inputs_ = session_->GetInputs();
auto end_prepare_time = GetTimeUs();
MS_LOG(INFO) << "PrepareTime = " << (end_prepare_time - start_prepare_time) / 1000 << " ms";
std::cout << "PrepareTime = " << (end_prepare_time - start_prepare_time) / 1000 << " ms" << std::endl;
// Load input
MS_LOG(INFO) << "start generate input data";
auto status = LoadInput();
MS_LOG(INFO) << "Load input data";
auto ms_inputs = session->GetInputs();
auto status = LoadInput(&ms_inputs);
if (status != RET_OK) {
MS_LOG(ERROR) << "Generate input data error";
MS_LOG(ERROR) << "Load input data error";
return status;
}
if (flags_->epochs_ > 0) {
status = MarkPerformance();
if ((epochs > 0) && (t_session != nullptr)) {
status = MarkPerformance(t_session);
if (status != RET_OK) {
MS_LOG(ERROR) << "Run MarkPerformance error: " << status;
std::cout << "Run MarkPerformance error: " << status << std::endl;
return status;
}
SaveModels(t_session, model); // save file if flags are on
}
if (!flags_->data_file_.empty()) {
status = MarkAccuracy();
for (auto &data : data_) {
data.second->shape.clear();
data.second->data.clear();
delete data.second;
if (t_session != nullptr) {
t_session->Eval();
}
data_.clear();
status = MarkAccuracy(session);
if (status != RET_OK) {
MS_LOG(ERROR) << "Run MarkAccuracy error: " << status;
std::cout << "Run MarkAccuracy error: " << status << std::endl;
return status;
}
}
status = CheckExecute(model);
return RET_OK;
}
int NetTrain::RunNetTrain() {
CreateAndRunNetwork(flags_->model_file_, true, flags_->epochs_);
auto status = CheckExecutionOfSavedModels(); // re-initialize sessions according to flags
if (status != RET_OK) {
MS_LOG(ERROR) << "Run CheckExecute error: " << status;
std::cout << "Run CheckExecute error: " << status << std::endl;
@ -653,8 +406,7 @@ int NetTrain::RunNetTrain() {
return RET_OK;
}
int NetTrain::CheckExecute(mindspore::lite::Model *model) {
int status;
int NetTrain::SaveModels(session::TrainSession *session, mindspore::lite::Model *model) {
if (!flags_->export_file_.empty()) {
auto ret = Model::Export(model, flags_->export_file_.c_str());
if (ret != RET_OK) {
@ -662,67 +414,39 @@ int NetTrain::CheckExecute(mindspore::lite::Model *model) {
std::cout << "Run SaveToFile error";
return RET_ERROR;
}
delete session_;
session_ = nullptr;
status = RunExportedNet();
}
if (!flags_->inference_file_.empty()) {
auto tick = GetTimeUs();
auto status = session->ExportInference(flags_->inference_file_);
if (status != RET_OK) {
MS_LOG(ERROR) << "Run Exported model error: " << status;
std::cout << "Run Exported model error: " << status << std::endl;
MS_LOG(ERROR) << "Save model error: " << status;
std::cout << "Save model error: " << status << std::endl;
return status;
}
} else {
if (!flags_->inference_file_.empty()) {
auto tick = GetTimeUs();
status = session_->ExportInference(flags_->inference_file_);
if (status != RET_OK) {
MS_LOG(ERROR) << "Save model error: " << status;
std::cout << "Save model error: " << status << std::endl;
return status;
}
std::cout << "ExportInference() execution time is " << GetTimeUs() - tick << "us\n";
delete session_;
session_ = nullptr;
status = RunExportedNetLite(flags_->inference_file_ + ".ms");
if (status != RET_OK) {
MS_LOG(ERROR) << "Running saved model error: " << status;
std::cout << "Running saved model error: " << status << std::endl;
return status;
}
}
std::cout << "ExportInference() execution time is " << GetTimeUs() - tick << "us\n";
}
return RET_OK;
}
void NetTrainFlags::InitInputDataList() {
char *saveptr1 = nullptr;
char *input_list = new char[this->in_data_file_.length() + 1];
snprintf(input_list, this->in_data_file_.length() + 1, "%s", this->in_data_file_.c_str());
const char *split_c = ",";
char *cur_input = strtok_r(input_list, split_c, &saveptr1);
while (cur_input != nullptr) {
input_data_list_.emplace_back(cur_input);
cur_input = strtok_r(nullptr, split_c, &saveptr1);
}
delete[] input_list;
}
void NetTrainFlags::InitResizeDimsList() {
std::string content;
content = this->resize_dims_in_;
std::vector<int64_t> shape;
auto shape_strs = StringSplit(content, std::string(DELIM_COLON));
for (const auto &shape_str : shape_strs) {
shape.clear();
auto dim_strs = StringSplit(shape_str, std::string(DELIM_COMMA));
std::cout << "Resize Dims: ";
for (const auto &dim_str : dim_strs) {
std::cout << dim_str << " ";
shape.emplace_back(static_cast<int64_t>(std::stoi(dim_str)));
int NetTrain::CheckExecutionOfSavedModels() {
int status = RET_OK;
if (!flags_->export_file_.empty()) {
status = NetTrain::CreateAndRunNetwork(flags_->export_file_, true, 0);
if (status != RET_OK) {
MS_LOG(ERROR) << "Run Exported model " << flags_->export_file_ << " error: " << status;
std::cout << "Run Exported model " << flags_->export_file_ << " error: " << status << std::endl;
return status;
}
std::cout << std::endl;
this->resize_dims_.emplace_back(shape);
}
if (!flags_->inference_file_.empty()) {
status = NetTrain::CreateAndRunNetwork(flags_->inference_file_ + ".ms", false, 0);
if (status != RET_OK) {
MS_LOG(ERROR) << "Running saved model " << flags_->inference_file_ << ".ms error: " << status;
std::cout << "Running saved model " << flags_->inference_file_ << ".ms error: " << status << std::endl;
return status;
}
}
return status;
}
int NetTrain::InitCallbackParameter() {
@ -766,7 +490,7 @@ int NetTrain::InitCallbackParameter() {
op_times_by_type_[call_param.node_type].second += cost;
op_times_by_name_[call_param.node_name].first++;
op_times_by_name_[call_param.node_name].second += cost;
if (layer_checksum_) {
if (flags_->layer_checksum_) {
auto out_tensor = after_outputs.at(0);
void *output = out_tensor->MutableData();
int tensor_size = out_tensor->ElementsNum();
@ -841,13 +565,6 @@ int NetTrain::Init() {
std::cerr << "modelPath is required" << std::endl;
return 1;
}
flags_->InitInputDataList();
flags_->InitResizeDimsList();
if (!flags_->resize_dims_.empty() && flags_->resize_dims_.size() != flags_->input_data_list_.size()) {
MS_LOG(ERROR) << "Size of input resizeDims should be equal to size of input inDataPath";
std::cerr << "Size of input resizeDims should be equal to size of input inDataPath" << std::endl;
return RET_ERROR;
}
if (flags_->time_profiling_) {
auto status = InitCallbackParameter();
@ -925,14 +642,6 @@ int NetTrain::PrintResult(const std::vector<std::string> &title,
return RET_OK;
}
NetTrain::~NetTrain() {
for (auto iter : this->data_) {
delete (iter.second);
}
this->data_.clear();
if (session_ != nullptr) delete (session_);
}
int RunNetTrain(int argc, const char **argv) {
NetTrainFlags flags;
Option<std::string> err = flags.ParseFlags(argc, argv);

View File

@ -42,15 +42,6 @@ enum MS_API DataType { kImage = 0, kBinary = 1 };
constexpr float relativeTolerance = 1e-5;
constexpr float absoluteTolerance = 1e-8;
struct MS_API CheckTensor {
CheckTensor(const std::vector<size_t> &shape, const std::vector<float> &data) {
this->shape = shape;
this->data = data;
}
std::vector<size_t> shape;
std::vector<float> data;
};
template <typename T>
float TensorSum(void *data, int size) {
T *typed_data = reinterpret_cast<T *>(data);
@ -84,10 +75,6 @@ class MS_API NetTrainFlags : public virtual FlagParser {
~NetTrainFlags() override = default;
void InitInputDataList();
void InitResizeDimsList();
public:
// common
std::string model_file_;
@ -118,25 +105,22 @@ class MS_API NetTrainFlags : public virtual FlagParser {
class MS_API NetTrain {
public:
explicit NetTrain(NetTrainFlags *flags) : flags_(flags) {}
virtual ~NetTrain();
virtual ~NetTrain() = default;
int Init();
int RunNetTrain();
int RunExportedNet();
private:
// call GenerateInputData or ReadInputFile to init inputTensors
int LoadInput();
int LoadInput(Vector<tensor::MSTensor *> *ms_inputs);
// call GenerateRandomData to fill inputTensors
int GenerateInputData();
int GenerateInputData(std::vector<mindspore::tensor::MSTensor *> *ms_inputs);
int GenerateRandomData(size_t size, void *data);
int ReadInputFile();
int CompareOutput();
int ReadInputFile(std::vector<mindspore::tensor::MSTensor *> *ms_inputs);
int CreateAndRunNetwork(const std::string &filename, int train_session, int epochs);
int InitCallbackParameter();
@ -208,22 +192,13 @@ class MS_API NetTrain {
return meanError;
}
int MarkPerformance();
int MarkPerformance(session::TrainSession *session);
int MarkAccuracy();
private:
int RunExportedNetLite(std::string file_name);
int MarkAccuracyLite(const std::unique_ptr<session::LiteSession> &lite_session);
int CompareOutputLite(const std::unique_ptr<session::LiteSession> &lite_session);
int CheckExecute(mindspore::lite::Model *model);
int MarkAccuracy(session::LiteSession *lite_session);
int CompareOutput(const session::LiteSession &lite_session);
int SaveModels(session::TrainSession *session, mindspore::lite::Model *model);
int CheckExecutionOfSavedModels();
NetTrainFlags *flags_;
session::TrainSession *session_ = nullptr;
std::vector<mindspore::tensor::MSTensor *> ms_inputs_;
std::unordered_map<std::string, std::vector<mindspore::tensor::MSTensor *>> ms_outputs_;
std::unordered_map<std::string, CheckTensor *> data_;
std::unordered_map<std::string, TypeId> data_type_map_{{"FLOAT", TypeId::kNumberTypeFloat},
{"INT32", TypeId::kNumberTypeInt32}};
// callback parameters
uint64_t op_begin_ = 0;
@ -234,7 +209,6 @@ class MS_API NetTrain {
mindspore::KernelCallBack before_call_back_;
mindspore::KernelCallBack after_call_back_;
bool layer_checksum_ = false;
};
int MS_API RunNetTrain(int argc, const char **argv);

View File

@ -143,7 +143,7 @@ bool IndexingCompress(const std::set<T> &quant_data_set, const std::map<T, size_
tensor->data.resize(new_data_str.size());
tensor->weightQunatCompressType = schema::WeightQunatCompressType_INDEXING;
MS_LOG(ERROR) << "set WeightQunatCompressType_INDEXING";
MS_LOG(DEBUG) << "set WeightQunatCompressType_INDEXING";
return true;
}
@ -285,21 +285,21 @@ bool PackRepetition(size_t bit_num, schema::TensorT *tensor) {
auto pack_sparsity_size_in_bit =
1 * 8 + 4 * 8 + bit_num + bit_num * unique_value_cnt + unique_value_bit * nz_cnt + nz_cnt * coor_best_bit;
size_t pack_sparsity_size_in_byte = ceil(pack_sparsity_size_in_bit / 8.0);
MS_LOG(ERROR) << "coor_best_bit: " << coor_best_bit << " ori: " << origin_size_in_byte
MS_LOG(DEBUG) << "coor_best_bit: " << coor_best_bit << " ori: " << origin_size_in_byte
<< " indexing: " << pack_repetition_size_in_byte << " sparse: " << pack_sparsity_size_in_byte;
auto min_byte_need = std::min({origin_size_in_byte, pack_repetition_size_in_byte, pack_sparsity_size_in_byte});
if (min_byte_need == origin_size_in_byte) {
return false;
} else if (min_byte_need == pack_repetition_size_in_byte) {
MS_LOG(ERROR) << "from " << origin_size_in_byte << " to " << pack_repetition_size_in_byte;
MS_LOG(DEBUG) << "from " << origin_size_in_byte << " to " << pack_repetition_size_in_byte;
return IndexingCompress<T>(quant_data_set, unique_value_index_map, unique_value_bit, unique_value_cnt,
pack_repetition_size_in_byte, bit_num, tensor);
} else if (min_byte_need == pack_sparsity_size_in_byte) {
MS_LOG(ERROR) << "from " << origin_size_in_byte << " to " << pack_sparsity_size_in_byte;
MS_LOG(DEBUG) << "from " << origin_size_in_byte << " to " << pack_sparsity_size_in_byte;
return SparsityCompress<T>(quant_data_set, unique_value_index_map, unique_value_bit, unique_value_cnt,
pack_sparsity_size_in_byte, nz_cnt, coor_best_bit, bit_num, tensor);
} else {
MS_LOG(ERROR) << "unexpected: " << min_byte_need << " not in {" << origin_size_in_byte << " "
MS_LOG(DEBUG) << "unexpected: " << min_byte_need << " not in {" << origin_size_in_byte << " "
<< pack_repetition_size_in_byte << " " << pack_sparsity_size_in_byte << "}";
}
return false;

View File

@ -22,6 +22,7 @@ file(GLOB_RECURSE CONVERTER_SRC RELATIVE ${CMAKE_CURRENT_SOURCE_DIR}
${CMAKE_CURRENT_SOURCE_DIR}/graphdef_transform.cc
${CMAKE_CURRENT_SOURCE_DIR}/optimizer.cc
${CMAKE_CURRENT_SOURCE_DIR}/../../src/common/file_utils.cc
${CMAKE_CURRENT_SOURCE_DIR}/../../src/common/quant_utils.cc
${CMAKE_CURRENT_SOURCE_DIR}/../common/graph_util.cc
${CMAKE_CURRENT_SOURCE_DIR}/../common/node_util.cc
${CMAKE_CURRENT_SOURCE_DIR}/../common/tensor_util.cc

View File

@ -14,14 +14,16 @@
* limitations under the License.
*/
#include "tools/converter/legacy_optimizer/graph/tensor_quant_pass.h"
#include <vector>
#include <cmath>
#include "tools/converter/legacy_optimizer/graph/tensor_quant_pass.h"
#include <algorithm>
#include "tools/converter/converter_context.h"
#include "tools/converter/quantizer/quantize_util.h"
#include "tools/common/tensor_util.h"
#include "tools/common/graph_util.h"
#include "tools/common/node_util.h"
#include "src/common/quant_utils.h"
namespace mindspore::lite {
namespace {
@ -49,7 +51,7 @@ STATUS ComputeDataToInt8(const std::unique_ptr<TensorT> &tensor, int32_t index)
return RET_OK;
}
for (size_t j = 0; j < wShapeSize; j++) {
qDatas[j] = quant::QuantizeData<int8_t>(weightData[j], weightQauntParam.get());
qDatas[j] = QuantizeData<int8_t>(weightData[j], weightQauntParam.get());
}
} else { // convert uint8 to int8
auto *weightData = static_cast<uint8_t *>(oriWeightData);
@ -141,7 +143,7 @@ STATUS ComputeQuantTensorPerChannel(TensorT *tensor, const int &tensor_index, co
auto *dst_data_int32 = reinterpret_cast<int32_t *>(dst_data.data());
dst_data_int32[index] = quant_data;
} else {
auto quant_data = quant::QuantizeData<int8_t>(raw_data, tensor->quantParams.at(i).get());
auto quant_data = QuantizeData<int8_t>(raw_data, tensor->quantParams.at(i).get());
dst_data[index] = quant_data;
}
}

View File

@ -44,6 +44,7 @@
#include "securec/include/securec.h"
#include "tools/common/tensor_util.h"
#include "src/common/file_utils.h"
#include "src/common/quant_utils.h"
#include "src/common/utils.h"
#include "tools/converter/quantizer/weight_quantizer.h"
@ -1282,8 +1283,7 @@ STATUS PostTrainingQuantizer::DoQuantize(FuncGraphPtr func_graph) {
return status;
}
if (calibrator_->config_param_.mixed) {
// get opname_bit map
if (calibrator_->config_param_.mixed) { // get opname_bit map
auto weight_quant_func_graph = CopyFuncGraph(func_graph);
if (weight_quant_func_graph == nullptr) {
MS_LOG(ERROR) << "CopyFuncGraph error";
@ -1315,7 +1315,6 @@ STATUS PostTrainingQuantizer::DoQuantize(FuncGraphPtr func_graph) {
MS_LOG(ERROR) << "create session failed!";
return RET_ERROR;
}
MS_LOG(INFO) << "start to update divergence's max value";
status = DoInference();
if (status != RET_OK) {
@ -1363,14 +1362,12 @@ STATUS PostTrainingQuantizer::DoQuantize(FuncGraphPtr func_graph) {
MS_LOG(ERROR) << "create session failed!";
return RET_ERROR;
}
MS_LOG(INFO) << "do bias correction";
status = BiasCorrection(func_graph);
if (status != RET_OK) {
MS_LOG(WARNING) << "BiasCorrection failed.";
}
}
return RET_OK;
}
@ -1477,7 +1474,7 @@ KernelCallBack PostTrainingQuantizer::GetBeforeCallBack(bool int8_op) {
quant_param_t.scale = quant_params[0].scale;
quant_param_t.zeroPoint = quant_params[0].zeroPoint;
for (auto float_data : fp32_op_input) {
auto quant_data = QuantizeData<int8_t>(float_data, quant_param_t, quant_max, quant_min);
auto quant_data = QuantizeData<int8_t>(float_data, &quant_param_t, quant_max, quant_min);
quant_datas.push_back(quant_data);
}

View File

@ -100,12 +100,12 @@ bool QuantStrategy::CanConvOpQuantized(const CNodePtr &node) const {
return true;
}
bool QuantStrategy::CanOpPostQuantized(AnfNodePtr &node) const {
bool QuantStrategy::CanOpPostQuantized(const AnfNodePtr &node) const {
MS_ASSERT(node != nullptr);
if (!node->isa<mindspore::CNode>()) {
return false;
}
auto cnode = std::dynamic_pointer_cast<mindspore::CNode>(node);
const auto cnode = std::dynamic_pointer_cast<mindspore::CNode>(node);
auto type = NodePrimitiveType(cnode);
static const std::vector<std::string> int8OpList = {
ops::kNameAddFusion, ops::kNameActivation, ops::kNameAvgPoolFusion,
@ -268,67 +268,6 @@ bool TensorQuantParamsInited(const schema::TensorT &tensor) {
return true;
}
STATUS CalQuantizationParams(schema::QuantParamT *quantParam, double mMin, double mMax, bool narrowRange, int quant_max,
int quant_min, int num_bits) {
MS_ASSERT(quantParam != nullptr);
if (mMin > 0.0f) {
MS_LOG(DEBUG) << "min " << mMin << " is bigger then 0, set to 0, this may course low precision";
mMin = 0.0f;
}
if (mMax < 0.0f) {
MS_LOG(DEBUG) << "mMax " << mMax << " is smaller than 0, set to 0, this may course low precision";
mMax = 0.0f;
}
if (mMin > mMax) {
MS_LOG(ERROR) << "cal error while min" << mMin << ">" << mMax;
return RET_PARAM_INVALID;
}
if (mMin == mMax) {
if (mMin != 0.0f) {
MS_LOG(ERROR) << "min and max should both be zero if they are equal to each other";
return RET_ERROR;
}
quantParam->inited = true;
quantParam->min = mMin;
quantParam->max = mMax;
quantParam->scale = 0.0f;
quantParam->zeroPoint = 0;
quantParam->narrowRange = narrowRange;
quantParam->numBits = num_bits;
return RET_OK;
}
auto quantMinFloat = static_cast<double>(quant_min);
auto quantMaxFloat = static_cast<double>(quant_max);
if (fabs(quantMaxFloat - quantMinFloat) <= 0.0f) {
MS_LOG(ERROR) << "divisor cannot be 0";
return RET_ERROR;
}
double scale = (mMax - mMin) / (quantMaxFloat - quantMinFloat);
if (fabs(scale) <= 0.0f) {
MS_LOG(ERROR) << "divisor 'scale' cannot be 0";
return RET_ERROR;
}
const double zeroPointFromMin = quantMinFloat - mMin / scale;
int zeroPoint = static_cast<int32_t>(std::round(zeroPointFromMin));
if (scale < SCALE_THREASHOLD) {
zeroPoint = 0;
}
// The zero point should always be in the range of quantized value,
// [qmin, qmax].
MS_ASSERT(zeroPoint >= quantMin);
MS_ASSERT(zeroPoint <= quantMax);
quantParam->inited = true;
quantParam->min = mMin;
quantParam->max = mMax;
quantParam->scale = scale;
quantParam->zeroPoint = zeroPoint;
quantParam->narrowRange = narrowRange;
quantParam->numBits = num_bits;
return RET_OK;
}
STATUS CalQuantizationParams(schema::QuantParamT *quantParam, double mMin, double mMax, bool narrowRange, int numBits) {
MS_ASSERT(quantParam != nullptr);
if (mMin > 0.0f) {
@ -999,26 +938,6 @@ STATUS UpdateTensorDataAndSize(const tensor::TensorPtr &weight, void *quant_data
return RET_OK;
}
void GetMaxMinPerchannel(int channels, int one_filter_size, int i, int elem_count, const float *raw_datas,
bool channel_at_first, float *desired_max, float *desired_min) {
float min = FLT_MAX;
float max = -FLT_MAX;
// find min and max
for (int j = 0; j < one_filter_size; j++) {
auto index = j + i * one_filter_size;
if (!channel_at_first) {
index = j * channels + i;
}
if (index >= elem_count) {
MS_LOG(ERROR) << "over flow!";
}
min = std::min(min, raw_datas[index]);
max = std::max(max, raw_datas[index]);
}
*desired_max = max;
*desired_min = min;
}
int CalChannels(const ShapeVector &dims, int channel_cnt, bool *channel_at_first) {
auto channels = dims[0];
if (!(*channel_at_first)) {

View File

@ -43,6 +43,7 @@
#include "src/lite_session.h"
#include "tools/converter/graphdef_transform.h"
#include "src/common/file_utils.h"
#include "src/common/quant_utils.h"
namespace mindspore::lite::quant {
static constexpr size_t UINT8_QUANTIZATION = 8;
@ -82,7 +83,7 @@ class QuantStrategy {
bool CanConvOpQuantized(const CNodePtr &node) const;
bool CanMulOpQuantized(const CNodePtr &node) const;
bool CanOpPostQuantized(AnfNodePtr &node) const;
bool CanOpPostQuantized(const AnfNodePtr &node) const;
bool CanTensorQuantized(const AnfNodePtr &inputNode) const;
size_t m_weight_size_;
@ -100,9 +101,6 @@ constexpr int quant_param_size = 32 * 8;
QuantParamHolderPtr GetCNodeQuantHolder(const PrimitivePtr &primitive);
STATUS CalQuantizationParams(schema::QuantParamT *quantParam, double mMin, double mMax, bool narrowRange, int quant_max,
int quant_min, int num_bits);
STATUS CalQuantizationParams(schema::QuantParamT *quantParam, double mMin, double mMax, bool narrowRange = false,
int numBits = UINT8_QUANTIZATION);
@ -112,9 +110,6 @@ std::vector<int8_t> KMeans(float *data, size_t elem_count, size_t k, size_t epoc
STATUS UpdateTensorDataAndSize(const tensor::TensorPtr &weight, void *quant_datas, int new_size, TypeId new_data_type);
void GetMaxMinPerchannel(int channels, int one_filter_size, int i, int elem_count, const float *raw_datas,
bool channel_at_first, float *desired_max, float *desired_min);
int CalChannels(const ShapeVector &dims, int channel_cnt, bool *channel_at_first);
void CalQuantAssitInfo(const PrimitivePtr &primitive, const ShapeVector &shapes, int index, bool *channel_at_first,
@ -123,193 +118,10 @@ void CalQuantAssitInfo(const PrimitivePtr &primitive, const ShapeVector &shapes,
void CalQuantAssitInfo(const schema::PrimitiveT &primitive, const std::vector<int> &shapes, int index,
bool *channel_at_first, int *channel_cnt);
template <typename T>
T QuantizeData(const float originData, const schema::QuantParamT *quantParam) {
MS_ASSERT(quantParam != nullptr);
MS_ASSERT(quantParam->inited);
const auto scale = quantParam->scale;
const auto zeroPoint = quantParam->zeroPoint;
const auto numBit = quantParam->numBits;
const auto narrowRange = quantParam->narrowRange;
double maxLimitTemp = static_cast<float>((1 << (unsigned int)numBit) - 1);
const double maxLimit = static_cast<float>(maxLimitTemp - zeroPoint + std::numeric_limits<T>::min()) * scale;
double minLimit;
if (narrowRange) {
minLimit = static_cast<float>(std::numeric_limits<T>::min() + 1 - zeroPoint) * scale;
} else {
minLimit = static_cast<float>(std::numeric_limits<T>::min() - zeroPoint) * scale;
}
return [maxLimit, minLimit, zeroPoint, scale, narrowRange, originData] {
double tmp;
if (originData > maxLimit) {
tmp = maxLimit;
} else if (originData < minLimit) {
tmp = minLimit;
} else {
tmp = originData;
}
auto quantData = static_cast<T>(std::round(zeroPoint + tmp / scale));
return quantData;
}();
}
template <typename T>
T QuantizeData(float originData, const schema::QuantParamT &quantParam, int quant_max, int quant_min) {
MS_ASSERT(quantParam != nullptr);
MS_ASSERT(quantParam->inited);
const auto scale = quantParam.scale;
const int zeroPoint = quantParam.zeroPoint;
const auto narrowRange = quantParam.narrowRange;
const int maxLimit = quant_max;
const int minLimit = quant_min;
if (scale <= SCALE_THREASHOLD) {
return 0;
}
return [maxLimit, minLimit, zeroPoint, scale, narrowRange, originData] {
auto quant_data = std::round(originData / scale + zeroPoint);
if (quant_data > maxLimit) {
quant_data = maxLimit;
} else if (quant_data < minLimit) {
quant_data = minLimit;
}
return static_cast<T>(quant_data);
}();
}
bool QuantParamEqual(const schema::QuantParamT &quant_param1, const schema::QuantParamT &quant_param2);
bool TensorQuantParamsInited(const schema::TensorT &tensor);
template <typename T>
STATUS DoPerChannelQuant(const tensor::TensorPtr &weight, const QuantType &quant_type,
std::vector<schema::QuantParamT> *quant_params, const int &quant_max, const int &quant_min,
const size_t &bit_num, const bool &k_means, std::vector<T> *quant_datas,
std::vector<float> *dequant_datas, TypeId quant_data_type, bool channel_at_first = true,
int channel_cnt = -1) {
auto dims = weight->shape();
size_t elem_count = weight->DataSize();
auto *raw_datas = static_cast<float *>(weight->data_c());
auto channels = CalChannels(dims, channel_cnt, &channel_at_first);
if (channels == 0) {
MS_LOG(ERROR) << "channels is zero";
return RET_ERROR;
}
size_t one_filter_size = elem_count / channels;
bool do_quant = quant_param_size / (sizeof(float) * 8 - bit_num) < one_filter_size;
if (!do_quant && quant_type == QuantType_WeightQuant) {
MS_LOG(INFO) << "too few elements in a filter, no need to quantize. " << one_filter_size;
return RET_CONTINUE;
}
for (int i = 0; i < channels; i++) {
float min = FLT_MAX;
float max = -FLT_MAX;
GetMaxMinPerchannel(channels, one_filter_size, i, elem_count, raw_datas, channel_at_first, &max, &min);
schema::QuantParamT quant_param;
STATUS status = CalQuantizationParams(&quant_param, min, max, false, quant_max, quant_min, bit_num);
if (status != RET_OK) {
MS_LOG(ERROR) << "CalQuantizationParams failed" << status;
return status;
}
// do quantization
double average_dequant = 0;
double average_raw = 0;
for (uint32_t j = 0; j < one_filter_size; j++) {
auto index = j + i * one_filter_size;
if (!channel_at_first) {
index = j * channels + i;
}
MS_ASSERT(index < elem_count);
float raw_data = raw_datas[index];
auto quant_data = QuantizeData<T>(raw_data, quant_param, quant_max, quant_min);
(*quant_datas)[index] = quant_data;
if (quant_type == QuantType_WeightQuant) {
float dequant_data = quant_param.scale * (quant_data - quant_param.zeroPoint);
(*dequant_datas)[index] = dequant_data;
average_dequant += dequant_data;
average_raw += raw_data;
}
}
if (quant_type == QuantType_WeightQuant && !k_means) {
// mean
average_dequant = average_dequant / one_filter_size;
average_raw = average_raw / one_filter_size;
// std
double variance_dequant = 0;
double variance_raw = 0;
for (uint32_t j = 0; j < one_filter_size; j++) {
auto index = j + i * one_filter_size;
if (!channel_at_first) {
index = j * channels + i;
}
MS_ASSERT(index < elem_count);
variance_dequant += std::pow((*dequant_datas)[index] - average_dequant, 2);
variance_raw += std::pow(raw_datas[index] - average_raw, 2);
}
variance_dequant = std::sqrt(variance_dequant / one_filter_size);
variance_raw = std::sqrt(variance_raw / one_filter_size);
quant_param.varCorr = 1;
if (variance_raw != 0 && variance_dequant != 0) {
auto temp_var_corr = variance_raw / variance_dequant;
if (temp_var_corr > 0 && temp_var_corr < 10) {
quant_param.varCorr = temp_var_corr;
} else {
MS_LOG(WARNING) << "unexpected var_corr: " << temp_var_corr;
}
}
quant_param.meanCorr = average_raw - average_dequant * quant_param.varCorr;
}
quant_params->emplace_back(quant_param);
}
auto status = UpdateTensorDataAndSize(weight, quant_datas->data(), quant_datas->size() * sizeof(T), quant_data_type);
if (status != RET_OK) {
MS_LOG(ERROR) << "UpdateTensorDataAndSize error";
return RET_ERROR;
}
return RET_OK;
}
template <typename T>
STATUS DoPerLayerQuant(const tensor::TensorPtr &weight, const QuantType &quant_type,
std::vector<schema::QuantParamT> *quant_params, const int &quant_max, const int &quant_min,
const size_t &bit_num, const bool &k_means, std::vector<T> *quant_datas,
TypeId quant_data_type) {
auto dims = weight->shape();
size_t elem_count = weight->DataSize();
auto *raw_datas = static_cast<float *>(weight->data_c());
float min = FLT_MAX;
float max = -FLT_MIN;
for (uint32_t i = 0; i < elem_count; i++) {
// find max min
min = std::min(min, raw_datas[i]);
max = std::max(max, raw_datas[i]);
}
schema::QuantParamT quant_param;
if (!k_means) {
STATUS status = CalQuantizationParams(&quant_param, min, max, false, quant_max, quant_min, bit_num);
if (status != RET_OK) {
MS_LOG(ERROR) << "CalQuantizationParams failed" << status;
return status;
}
}
quant_params->emplace_back(quant_param);
// update data and datatype
for (uint32_t i = 0; i < elem_count; i++) {
float raw_data = raw_datas[i];
if (!k_means) {
auto quant_data = QuantizeData<T>(raw_data, quant_param, quant_max, quant_min);
(*quant_datas)[i] = quant_data;
}
}
auto status = UpdateTensorDataAndSize(weight, quant_datas->data(), quant_datas->size() * sizeof(T), quant_data_type);
if (status != RET_OK) {
MS_LOG(ERROR) << "UpdateTensorDataAndSize error";
return RET_ERROR;
}
return RET_OK;
}
template <typename T>
STATUS DoBitPack(const tensor::TensorPtr &weight, const size_t &bit_num, const std::vector<T> &quant_datas) {
if (bit_num != 8 && bit_num != 16) {
@ -363,15 +175,19 @@ STATUS QuantFilter(const tensor::TensorPtr &weight, const PrimitivePtr &primitiv
}
std::vector<T> quant_data(elem_count);
std::vector<float> dequant_datas(elem_count);
int ret = RET_OK;
if (per_channel) {
bool channel_at_first = true;
int channel_cnt = -1;
CalQuantAssitInfo(primitive, dims, index, &channel_at_first, &channel_cnt);
// channel at first
ret = DoPerChannelQuant<T>(weight, quant_type, &quant_params, quant_max, quant_min, bit_num, k_means, &quant_data,
&dequant_datas, quant_data_type, channel_at_first, channel_cnt);
auto channels = CalChannels(dims, channel_cnt, &channel_at_first);
if (channels == 0) {
MS_LOG(ERROR) << "channels is zero";
return RET_ERROR;
}
ret = DoPerChannelQuant<T>(static_cast<float *>(weight->data_c()), weight->DataSize(),
static_cast<mindspore::schema::QuantType>(quant_type), &quant_params, quant_max,
quant_min, bit_num, k_means, &quant_data, channels, channel_at_first);
if (ret == RET_CONTINUE) {
return ret;
} else if (ret != RET_OK) {
@ -379,13 +195,18 @@ STATUS QuantFilter(const tensor::TensorPtr &weight, const PrimitivePtr &primitiv
return ret;
}
} else {
ret = DoPerLayerQuant<T>(weight, quant_type, &quant_params, quant_max, quant_min, bit_num, k_means, &quant_data,
quant_data_type);
ret = DoPerLayerQuant<T>(static_cast<float *>(weight->data_c()), weight->DataSize(), &quant_params, quant_max,
quant_min, bit_num, k_means, &quant_data);
if (ret != RET_OK) {
MS_LOG(ERROR) << "Do per layer quant failed.";
return ret;
}
}
auto status = UpdateTensorDataAndSize(weight, quant_data.data(), quant_data.size() * sizeof(T), quant_data_type);
if (status != RET_OK) {
MS_LOG(ERROR) << "UpdateTensorDataAndSize error";
return RET_ERROR;
}
#ifdef HUFFMAN_ENCODE
auto huffman_encode = std::make_unique<lite::HuffmanEncode>();