!16410 arm32 fp16 support npu and weight quant

From: @lzkcode
Reviewed-by: @zhang_xue_tong,@zhanghaibo5
Signed-off-by: @zhang_xue_tong
This commit is contained in:
mindspore-ci-bot 2021-05-18 10:43:47 +08:00 committed by Gitee
commit 66ea5f4ea1
12 changed files with 249 additions and 42 deletions

View File

@ -577,9 +577,13 @@ build_lite()
find . -maxdepth 1 | grep -v java | grep '/' | xargs -I {} rm -rf {}
fi
if [[ "${LITE_LANGUAGE}" == "cpp" ]]; then
if [[ "${DEVICE}" == "" && "${LOCAL_LITE_PLATFORM}" == "arm64" ]]; then
LOCAL_LITE_ENABLE_GPU="opencl"
LOCAL_LITE_ENABLE_NPU="on"
if [[ "${DEVICE}" == "" ]]; then
if [[ "${LOCAL_LITE_PLATFORM}" == "arm64" || "${LOCAL_LITE_PLATFORM}" == "arm32" ]]; then
LOCAL_LITE_ENABLE_NPU="on"
fi
if [[ "${LOCAL_LITE_PLATFORM}" == "arm64" ]]; then
LOCAL_LITE_ENABLE_GPU="opencl"
fi
fi
if [[ "${LOCAL_INC_BUILD}" == "off" ]]; then
@ -589,12 +593,7 @@ build_lite()
fi
if [ "${LOCAL_LITE_ENABLE_NPU}" == "on" ]; then
if [ "${LOCAL_LITE_PLATFORM}" == "arm64" ]; then
checkddk
else
echo "NPU only support platform arm64."
exit 1
fi
checkddk
fi
cd ${BASEPATH}/mindspore/lite/build

View File

@ -192,6 +192,14 @@ if(PLATFORM_ARM64)
install(TARGETS ${BENCHMARK_NAME} RUNTIME DESTINATION ${BENCHMARK_ROOT_DIR} COMPONENT ${RUNTIME_COMPONENT_NAME})
endif()
elseif(PLATFORM_ARM32)
if(SUPPORT_NPU)
install(FILES ${DDK_LIB_PATH}/libhiai.so DESTINATION ${RUNTIME_DIR}/third_party/hiai_ddk/lib
COMPONENT ${RUNTIME_COMPONENT_NAME})
install(FILES ${DDK_LIB_PATH}/libhiai_ir.so DESTINATION ${RUNTIME_DIR}/third_party/hiai_ddk/lib
COMPONENT ${RUNTIME_COMPONENT_NAME})
install(FILES ${DDK_LIB_PATH}/libhiai_ir_build.so DESTINATION ${RUNTIME_DIR}/third_party/hiai_ddk/lib
COMPONENT ${RUNTIME_COMPONENT_NAME})
endif()
if(SUPPORT_TRAIN)
install(DIRECTORY ${TOP_DIR}/mindspore/lite/include/ DESTINATION ${RUNTIME_INC_DIR}
COMPONENT ${RUNTIME_COMPONENT_NAME} FILES_MATCHING PATTERN "*.h")

View File

@ -73,6 +73,7 @@ set(LITE_SRC
${CMAKE_CURRENT_SOURCE_DIR}/errorcode.cc
${CMAKE_CURRENT_SOURCE_DIR}/weight_decoder.cc
${CMAKE_CURRENT_SOURCE_DIR}/huffman_decode.cc
${CMAKE_CURRENT_SOURCE_DIR}/cpu_info.cc
)
if(SUPPORT_GPU STREQUAL opencl)

View File

@ -0,0 +1,138 @@
/**
* Copyright 2021 Huawei Technologies Co., Ltd
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#ifdef ENABLE_ARM
#include "src/cpu_info.h"
#include <sys/auxv.h>
#include <asm/hwcap.h>
#include <fstream>
#include "src/common/log_adapter.h"
#include "nnacl/nnacl_utils.h"
namespace mindspore::lite {
uint32_t CpuInfo::MidrSetPart(uint32_t part) {
return (midr_ & ~CPUINFO_ARM_MIDR_PART_MASK) | ((part << CPUINFO_ARM_MIDR_PART_OFFSET) & CPUINFO_ARM_MIDR_PART_MASK);
}
uint32_t CpuInfo::MidrSetImplementer(uint32_t implementer) {
return (midr_ & ~CPUINFO_ARM_MIDR_IMPLEMENTER_MASK) |
((implementer << CPUINFO_ARM_MIDR_IMPLEMENTER_OFFSET) & CPUINFO_ARM_MIDR_IMPLEMENTER_MASK);
}
uint32_t CpuInfo::StringToDigit(const std::string &str) {
// hex string to digit
// verify hex prefix '0' and 'x'
if (str[0] != '0' || str[1] != 'x') {
return 0;
}
auto str_length = str.length();
uint32_t str_digit = 0;
for (unsigned int i = 2; i < str_length; ++i) {
auto tmp_char = str[i];
uint32_t digit;
if (tmp_char >= '0' && tmp_char <= '9') {
digit = tmp_char - '0';
} else if ((uint32_t)(tmp_char - 'A') < 6) {
digit = 10 + (tmp_char - 'A');
} else if ((uint32_t)(tmp_char - 'a') < 6) {
digit = 10 + (tmp_char - 'a');
} else {
return 0;
}
str_digit = str_digit * 16 + digit;
}
return str_digit;
}
uint32_t CpuInfo::ParseArmCpuPart(const std::string &cpu_part) {
// cpu_part string length is in [3, 5]
auto cpu_part_length = cpu_part.length();
if (cpu_part_length < 3 || cpu_part_length > 5) {
return 0;
}
return StringToDigit(cpu_part);
}
uint32_t CpuInfo::ParseArmCpuImplementer(const std::string &str) {
auto str_length = str.length();
switch (str_length) {
case 3:
case 4:
break;
default:
return 0;
}
return StringToDigit(str);
}
/* Only get hardware and midr now*/
void CpuInfo::GetArmProcCpuInfo(AndroidCpuInfo *android_cpu_info) {
std::ifstream infile("/proc/cpuinfo", std::ios::in);
std::string line;
while (getline(infile, line)) {
for (unsigned int i = 0; i < line.length(); ++i) {
if (line[i] == ':') {
std::string prefix = line.substr(0, i);
prefix.erase(0, prefix.find_first_not_of(' '));
prefix.erase(prefix.find_last_not_of('\t') + 1);
std::string suffix = line.substr(i + 2);
if (prefix == "CPU implementer" && android_cpu_info->cpu_implementer == 0) {
android_cpu_info->cpu_implementer = ParseArmCpuImplementer(suffix);
} else if (prefix == "CPU part" && android_cpu_info->cpu_part == 0) {
android_cpu_info->cpu_part = ParseArmCpuPart(suffix);
} else if (prefix == "Hardware" && android_cpu_info->hardware.empty()) {
android_cpu_info->hardware = suffix;
}
}
}
}
infile.close();
}
bool CpuInfo::ArmIsSupportFp16() {
#ifdef ENABLE_ARM32
GetArmProcCpuInfo(&android_cpu_info_);
midr_ = MidrSetPart(android_cpu_info_.cpu_part);
midr_ = MidrSetImplementer(android_cpu_info_.cpu_implementer);
switch (midr_ & (CPUINFO_ARM_MIDR_IMPLEMENTER_MASK | CPUINFO_ARM_MIDR_PART_MASK)) {
case UINT32_C(0x4100D050): /* Cortex-A55 */
case UINT32_C(0x4100D060): /* Cortex-A65 */
case UINT32_C(0x4100D0B0): /* Cortex-A76 */
case UINT32_C(0x4100D0C0): /* Neoverse N1 */
case UINT32_C(0x4100D0D0): /* Cortex-A77 */
case UINT32_C(0x4100D0E0): /* Cortex-A76AE */
case UINT32_C(0x4800D400): /* Cortex-A76 (HiSilicon) */
case UINT32_C(0x51008020): /* Kryo 385 Gold (Cortex-A75) */
case UINT32_C(0x51008030): /* Kryo 385 Silver (Cortex-A55) */
case UINT32_C(0x51008040): /* Kryo 485 Gold (Cortex-A76) */
case UINT32_C(0x51008050): /* Kryo 485 Silver (Cortex-A55) */
case UINT32_C(0x53000030): /* Exynos M4 */
case UINT32_C(0x53000040): /* Exynos M5 */
fp16_flag_ = true;
}
#elif defined(ENABLE_ARM64)
int hwcap_type = 16;
uint32_t hwcap = getHwCap(hwcap_type);
if (hwcap & HWCAP_FPHP) {
MS_LOG(DEBUG) << "Hw cap support FP16, hwcap: 0x" << hwcap;
fp16_flag_ = true;
} else {
MS_LOG(DEBUG) << "Hw cap NOT support FP16, hwcap: 0x" << hwcap;
}
#endif
return fp16_flag_;
}
} // namespace mindspore::lite
#endif

View File

@ -0,0 +1,52 @@
/**
* Copyright 2021 Huawei Technologies Co., Ltd
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#ifdef ENABLE_ARM
#include <string>
#ifndef MINDSPORE_LITE_SRC_CPU_INFO_H
#define MINDSPORE_LITE_SRC_CPU_INFO_H
namespace mindspore::lite {
#define CPUINFO_HARDWARE_VALUE_MAX 64
/* As per include/sys/system_properties.h in Android NDK */
#define CPUINFO_ARM_MIDR_IMPLEMENTER_MASK UINT32_C(0xFF000000)
#define CPUINFO_ARM_MIDR_PART_MASK UINT32_C(0x0000FFF0)
#define CPUINFO_ARM_MIDR_IMPLEMENTER_OFFSET 24
#define CPUINFO_ARM_MIDR_PART_OFFSET 4
typedef struct AndroidCpuInfo {
uint32_t cpu_implementer = 0;
uint32_t cpu_part = 0;
std::string hardware = "";
} AndroidCpuInfo;
class CpuInfo {
public:
CpuInfo() = default;
virtual ~CpuInfo() = default;
void GetArmProcCpuInfo(AndroidCpuInfo *android_cpu_info);
uint32_t ParseArmCpuImplementer(const std::string &suffix);
uint32_t ParseArmCpuPart(const std::string &suffix);
uint32_t MidrSetPart(uint32_t part);
uint32_t MidrSetImplementer(uint32_t implementer);
bool ArmIsSupportFp16();
uint32_t StringToDigit(const std::string &str);
private:
bool fp16_flag_ = false;
uint32_t midr_ = 0;
AndroidCpuInfo android_cpu_info_;
};
} // namespace mindspore::lite
#endif // MINDSPORE_LITE_SRC_CPU_INFO_H
#endif

View File

@ -13,11 +13,6 @@
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#ifdef __ANDROID__
#include <sys/auxv.h>
#include <asm/hwcap.h>
#endif
#include "src/inner_context.h"
#include "include/errorcode.h"
#include "src/common/log_adapter.h"
@ -38,6 +33,10 @@ InnerContext::InnerContext(const Context *context) {
for (auto &device_ctx : context->device_list_) {
this->device_list_.push_back(device_ctx);
}
#ifdef ENABLE_ARM
cpu_info_ = new CpuInfo;
fp16_flag_ = cpu_info_->ArmIsSupportFp16();
#endif
}
#if SUPPORT_NPU
@ -60,6 +59,10 @@ InnerContext::InnerContext(const Context *context, NPUManager *npu_manager) {
}
}
this->npu_manager_ = npu_manager;
#ifdef ENABLE_ARM
cpu_info_ = new CpuInfo;
fp16_flag_ = cpu_info_->ArmIsSupportFp16();
#endif
}
#endif
@ -111,6 +114,9 @@ InnerContext::~InnerContext() {
free(this->thread_pool_);
this->thread_pool_ = nullptr;
}
#ifdef ENABLE_ARM
delete cpu_info_;
#endif
}
int InnerContext::IsValid() const {
@ -250,23 +256,5 @@ NpuDeviceInfo InnerContext::GetNpuInfo() const {
}
// Support CPU backend to judge whether it supports Float16.
bool InnerContext::IsSupportFloat16() const {
bool status = false;
#if defined(ENABLE_ARM64)
#if defined(__ANDROID__)
int hwcap_type = 16;
uint32_t hwcap = getHwCap(hwcap_type);
if (hwcap & HWCAP_FPHP) {
MS_LOG(DEBUG) << "Hw cap support FP16, hwcap: 0x" << hwcap;
status = true;
} else {
MS_LOG(DEBUG) << "Hw cap NOT support FP16, hwcap: 0x" << hwcap;
status = false;
}
#endif
#endif
return status;
}
bool InnerContext::IsSupportFloat16() const { return fp16_flag_; }
} // namespace mindspore::lite

View File

@ -21,6 +21,9 @@
#include "include/context.h"
#include "src/runtime/runtime_api.h"
#include "src/runtime/allocator.h"
#ifdef ENABLE_ARM
#include "src/cpu_info.h"
#endif
#ifdef SUPPORT_NPU
#include "src/runtime/agent/npu/npu_manager.h"
#endif
@ -72,9 +75,13 @@ struct InnerContext : public Context {
bool IsSupportFloat16() const;
#if SUPPORT_NPU
bool fp16_flag_ = false;
private:
#ifdef ENABLE_ARM
CpuInfo *cpu_info_ = nullptr;
#endif
#if SUPPORT_NPU
NPUManager *npu_manager_ = nullptr;
#endif
};

View File

@ -197,7 +197,7 @@ int WeightDecoder::DequantWeight(lite::Tensor *input_tensor, bool channel_first,
input_tensor->set_own_data(true);
input_tensor->set_data_type(dst_data_type);
} else if (input_tensor->data_type() == kNumberTypeInt16 && dst_data_type == kNumberTypeFloat16) {
#if defined(ENABLE_ARM64) && defined(ENABLE_FP16)
#if defined(ENABLE_ARM) && defined(ENABLE_FP16)
auto new_const_data = DequantData<int16_t, float16_t>(input_tensor, channel_first);
input_tensor->set_data(new_const_data);
input_tensor->set_own_data(true);
@ -212,7 +212,7 @@ int WeightDecoder::DequantWeight(lite::Tensor *input_tensor, bool channel_first,
input_tensor->set_own_data(true);
input_tensor->set_data_type(dst_data_type);
} else if (input_tensor->data_type() == kNumberTypeInt8 && dst_data_type == kNumberTypeFloat16) {
#if defined(ENABLE_ARM64) && defined(ENABLE_FP16)
#if defined(ENABLE_ARM) && defined(ENABLE_FP16)
auto new_const_data = DequantData<int8_t, float16_t>(input_tensor, channel_first);
input_tensor->set_data(new_const_data);
input_tensor->set_own_data(true);

View File

@ -171,8 +171,12 @@ class WeightDecoder {
if (!channel_first) {
index = channels * j + i;
}
auto dequant_data = (quant_datas[index] - zero_point) * scale;
dequant_datas[index] = static_cast<DT>(dequant_data * var_corr + mean_corr);
#ifdef ENABLE_ARM32
volatile float dequant_data = (quant_datas[index] - zero_point) * scale * var_corr + mean_corr;
dequant_datas[index] = static_cast<DT>(dequant_data);
#else
dequant_datas[index] = static_cast<DT>((quant_datas[index] - zero_point) * scale * var_corr + mean_corr);
#endif
}
}
} else {
@ -190,7 +194,12 @@ class WeightDecoder {
}
dequant_datas[j] = static_cast<DT>(param.clusters[index - INT8_MIN]);
} else {
#ifdef ENABLE_ARM32
volatile float dequant_data = (quant_datas[j] - zero_point) * scale;
dequant_datas[j] = static_cast<DT>(dequant_data);
#else
dequant_datas[j] = static_cast<DT>((quant_datas[j] - zero_point) * scale);
#endif
}
}
}

View File

@ -160,6 +160,7 @@ set(TEST_LITE_SRC
${LITE_DIR}/src/common/string_util.cc
${LITE_DIR}/src/common/quant_utils.cc
${LITE_DIR}/src/errorcode.cc
${LITE_DIR}/src/cpu_info.cc
)
if(ENABLE_TOOLS)

View File

@ -1899,7 +1899,9 @@ function Run_arm32() {
if [ -f ${arm32_path}/mindspore-lite-${version}-inference-android-aarch32/inference/lib/libminddata-lite.so ]; then
cp -a ${arm32_path}/mindspore-lite-${version}-inference-android-aarch32/inference/lib/libminddata-lite.so ${benchmark_test_path}/libminddata-lite.so || exit 1
fi
cp -a ${arm32_path}/mindspore-lite-${version}-inference-android-aarch32/inference/third_party/hiai_ddk/lib/libhiai.so ${benchmark_test_path}/libhiai.so || exit 1
cp -a ${arm32_path}/mindspore-lite-${version}-inference-android-aarch32/inference/third_party/hiai_ddk/lib/libhiai_ir.so ${benchmark_test_path}/libhiai_ir.so || exit 1
cp -a ${arm32_path}/mindspore-lite-${version}-inference-android-aarch32/inference/third_party/hiai_ddk/lib/libhiai_ir_build.so ${benchmark_test_path}/libhiai_ir_build.so || exit 1
cp -a ${arm32_path}/mindspore-lite-${version}-inference-android-aarch32/inference/lib/libmindspore-lite.so ${benchmark_test_path}/libmindspore-lite.so || exit 1
cp -a ${arm32_path}/mindspore-lite-${version}-inference-android-aarch32/inference/lib/libmslite_kernel_reg.so ${benchmark_test_path}/libmslite_kernel_reg.so || exit 1
cp -a ${arm32_path}/mindspore-lite-${version}-inference-android-aarch32/tools/benchmark/benchmark ${benchmark_test_path}/benchmark || exit 1
@ -2127,7 +2129,9 @@ function Run_armv82_a32_fp16() {
if [ -f ${armv82_path}/mindspore-lite-${version}-inference-android-aarch32/inference/minddata/lib/libminddata-lite.so ]; then
cp -a ${armv82_path}/mindspore-lite-${version}-inference-android-aarch32/inference/minddata/lib/libminddata-lite.so ${benchmark_test_path}/libminddata-lite.so || exit 1
fi
cp -a ${armv82_path}/mindspore-lite-${version}-inference-android-aarch32/inference/third_party/hiai_ddk/lib/libhiai.so ${benchmark_test_path}/libhiai.so || exit 1
cp -a ${armv82_path}/mindspore-lite-${version}-inference-android-aarch32/inference/third_party/hiai_ddk/lib/libhiai_ir.so ${benchmark_test_path}/libhiai_ir.so || exit 1
cp -a ${armv82_path}/mindspore-lite-${version}-inference-android-aarch32/inference/third_party/hiai_ddk/lib/libhiai_ir_build.so ${benchmark_test_path}/libhiai_ir_build.so || exit 1
cp -a ${armv82_path}/mindspore-lite-${version}-inference-android-aarch32/inference/lib/libmindspore-lite.so ${benchmark_test_path}/libmindspore-lite.so || exit 1
cp -a ${armv82_path}/mindspore-lite-${version}-inference-android-aarch32/inference/lib/libmslite_kernel_reg.so ${benchmark_test_path}/libmslite_kernel_reg.so || exit 1
cp -a ${armv82_path}/mindspore-lite-${version}-inference-android-aarch32/tools/benchmark/benchmark ${benchmark_test_path}/benchmark || exit 1

View File

@ -149,7 +149,7 @@ function Run_arm() {
cp -a ${arm_path}/mindspore-lite-${version_arm}-train-android-${process_unit}/train/third_party/libjpeg-turbo/lib/libturbojpeg.so* ${benchmark_train_test_path}/ || exit 1
cp -a ${arm_path}/mindspore-lite-${version_arm}-train-android-${process_unit}/train/lib/libminddata-lite.so ${benchmark_train_test_path}/libminddata-lite.so || exit 1
fi
if [ "$1" == arm64 ]; then
if [ "$1" == arm64 ] || [ "$1" == arm32 ]; then
cp -a ${arm_path}/mindspore-lite-${version_arm}-train-android-${process_unit}/train/third_party/hiai_ddk/lib/libhiai.so ${benchmark_train_test_path}/libhiai.so || exit 1
cp -a ${arm_path}/mindspore-lite-${version_arm}-train-android-${process_unit}/train/third_party/hiai_ddk/lib/libhiai_ir.so ${benchmark_train_test_path}/libhiai_ir.so || exit 1
cp -a ${arm_path}/mindspore-lite-${version_arm}-train-android-${process_unit}/train/third_party/hiai_ddk/lib/libhiai_ir_build.so ${benchmark_train_test_path}/libhiai_ir_build.so || exit 1