diff --git a/build.sh b/build.sh index dfde88ee1d0..25e797fe36b 100755 --- a/build.sh +++ b/build.sh @@ -577,9 +577,13 @@ build_lite() find . -maxdepth 1 | grep -v java | grep '/' | xargs -I {} rm -rf {} fi if [[ "${LITE_LANGUAGE}" == "cpp" ]]; then - if [[ "${DEVICE}" == "" && "${LOCAL_LITE_PLATFORM}" == "arm64" ]]; then - LOCAL_LITE_ENABLE_GPU="opencl" - LOCAL_LITE_ENABLE_NPU="on" + if [[ "${DEVICE}" == "" ]]; then + if [[ "${LOCAL_LITE_PLATFORM}" == "arm64" || "${LOCAL_LITE_PLATFORM}" == "arm32" ]]; then + LOCAL_LITE_ENABLE_NPU="on" + fi + if [[ "${LOCAL_LITE_PLATFORM}" == "arm64" ]]; then + LOCAL_LITE_ENABLE_GPU="opencl" + fi fi if [[ "${LOCAL_INC_BUILD}" == "off" ]]; then @@ -589,12 +593,7 @@ build_lite() fi if [ "${LOCAL_LITE_ENABLE_NPU}" == "on" ]; then - if [ "${LOCAL_LITE_PLATFORM}" == "arm64" ]; then - checkddk - else - echo "NPU only support platform arm64." - exit 1 - fi + checkddk fi cd ${BASEPATH}/mindspore/lite/build diff --git a/cmake/package_lite.cmake b/cmake/package_lite.cmake index 835689b2309..18459339624 100644 --- a/cmake/package_lite.cmake +++ b/cmake/package_lite.cmake @@ -176,6 +176,14 @@ if(PLATFORM_ARM64) install(TARGETS ${BENCHMARK_NAME} RUNTIME DESTINATION ${BENCHMARK_ROOT_DIR} COMPONENT ${RUNTIME_COMPONENT_NAME}) endif() elseif(PLATFORM_ARM32) + if(SUPPORT_NPU) + install(FILES ${DDK_LIB_PATH}/libhiai.so DESTINATION ${RUNTIME_DIR}/third_party/hiai_ddk/lib + COMPONENT ${RUNTIME_COMPONENT_NAME}) + install(FILES ${DDK_LIB_PATH}/libhiai_ir.so DESTINATION ${RUNTIME_DIR}/third_party/hiai_ddk/lib + COMPONENT ${RUNTIME_COMPONENT_NAME}) + install(FILES ${DDK_LIB_PATH}/libhiai_ir_build.so DESTINATION ${RUNTIME_DIR}/third_party/hiai_ddk/lib + COMPONENT ${RUNTIME_COMPONENT_NAME}) + endif() if(SUPPORT_TRAIN) install(DIRECTORY ${TOP_DIR}/mindspore/lite/include/ DESTINATION ${RUNTIME_INC_DIR} COMPONENT ${RUNTIME_COMPONENT_NAME} FILES_MATCHING PATTERN "*.h") diff --git a/mindspore/lite/src/CMakeLists.txt b/mindspore/lite/src/CMakeLists.txt index 4a9718d309d..c0f0aab2b61 100644 --- a/mindspore/lite/src/CMakeLists.txt +++ b/mindspore/lite/src/CMakeLists.txt @@ -76,6 +76,7 @@ set(LITE_SRC ${CMAKE_CURRENT_SOURCE_DIR}/errorcode.cc ${CMAKE_CURRENT_SOURCE_DIR}/weight_decoder.cc ${CMAKE_CURRENT_SOURCE_DIR}/huffman_decode.cc + ${CMAKE_CURRENT_SOURCE_DIR}/cpu_info.cc ) if(SUPPORT_GPU STREQUAL opencl) diff --git a/mindspore/lite/src/cpu_info.cc b/mindspore/lite/src/cpu_info.cc new file mode 100644 index 00000000000..7e3ba21b1e0 --- /dev/null +++ b/mindspore/lite/src/cpu_info.cc @@ -0,0 +1,138 @@ +/** + * Copyright 2021 Huawei Technologies Co., Ltd + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +#ifdef ENABLE_ARM +#include "src/cpu_info.h" +#include +#include +#include +#include "src/common/log_adapter.h" +#include "nnacl/nnacl_utils.h" + +namespace mindspore::lite { +uint32_t CpuInfo::MidrSetPart(uint32_t part) { + return (midr_ & ~CPUINFO_ARM_MIDR_PART_MASK) | ((part << CPUINFO_ARM_MIDR_PART_OFFSET) & CPUINFO_ARM_MIDR_PART_MASK); +} + +uint32_t CpuInfo::MidrSetImplementer(uint32_t implementer) { + return (midr_ & ~CPUINFO_ARM_MIDR_IMPLEMENTER_MASK) | + ((implementer << CPUINFO_ARM_MIDR_IMPLEMENTER_OFFSET) & CPUINFO_ARM_MIDR_IMPLEMENTER_MASK); +} + +uint32_t CpuInfo::StringToDigit(const std::string &str) { + // hex string to digit + // verify hex prefix '0' and 'x' + if (str[0] != '0' || str[1] != 'x') { + return 0; + } + auto str_length = str.length(); + uint32_t str_digit = 0; + for (unsigned int i = 2; i < str_length; ++i) { + auto tmp_char = str[i]; + uint32_t digit; + if (tmp_char >= '0' && tmp_char <= '9') { + digit = tmp_char - '0'; + } else if ((uint32_t)(tmp_char - 'A') < 6) { + digit = 10 + (tmp_char - 'A'); + } else if ((uint32_t)(tmp_char - 'a') < 6) { + digit = 10 + (tmp_char - 'a'); + } else { + return 0; + } + str_digit = str_digit * 16 + digit; + } + return str_digit; +} + +uint32_t CpuInfo::ParseArmCpuPart(const std::string &cpu_part) { + // cpu_part string length is in [3, 5] + auto cpu_part_length = cpu_part.length(); + if (cpu_part_length < 3 || cpu_part_length > 5) { + return 0; + } + return StringToDigit(cpu_part); +} + +uint32_t CpuInfo::ParseArmCpuImplementer(const std::string &str) { + auto str_length = str.length(); + switch (str_length) { + case 3: + case 4: + break; + default: + return 0; + } + return StringToDigit(str); +} + +/* Only get hardware and midr now*/ +void CpuInfo::GetArmProcCpuInfo(AndroidCpuInfo *android_cpu_info) { + std::ifstream infile("/proc/cpuinfo", std::ios::in); + std::string line; + while (getline(infile, line)) { + for (unsigned int i = 0; i < line.length(); ++i) { + if (line[i] == ':') { + std::string prefix = line.substr(0, i); + prefix.erase(0, prefix.find_first_not_of(' ')); + prefix.erase(prefix.find_last_not_of('\t') + 1); + std::string suffix = line.substr(i + 2); + if (prefix == "CPU implementer" && android_cpu_info->cpu_implementer == 0) { + android_cpu_info->cpu_implementer = ParseArmCpuImplementer(suffix); + } else if (prefix == "CPU part" && android_cpu_info->cpu_part == 0) { + android_cpu_info->cpu_part = ParseArmCpuPart(suffix); + } else if (prefix == "Hardware" && android_cpu_info->hardware.empty()) { + android_cpu_info->hardware = suffix; + } + } + } + } + infile.close(); +} + +bool CpuInfo::ArmIsSupportFp16() { +#ifdef ENABLE_ARM32 + GetArmProcCpuInfo(&android_cpu_info_); + midr_ = MidrSetPart(android_cpu_info_.cpu_part); + midr_ = MidrSetImplementer(android_cpu_info_.cpu_implementer); + switch (midr_ & (CPUINFO_ARM_MIDR_IMPLEMENTER_MASK | CPUINFO_ARM_MIDR_PART_MASK)) { + case UINT32_C(0x4100D050): /* Cortex-A55 */ + case UINT32_C(0x4100D060): /* Cortex-A65 */ + case UINT32_C(0x4100D0B0): /* Cortex-A76 */ + case UINT32_C(0x4100D0C0): /* Neoverse N1 */ + case UINT32_C(0x4100D0D0): /* Cortex-A77 */ + case UINT32_C(0x4100D0E0): /* Cortex-A76AE */ + case UINT32_C(0x4800D400): /* Cortex-A76 (HiSilicon) */ + case UINT32_C(0x51008020): /* Kryo 385 Gold (Cortex-A75) */ + case UINT32_C(0x51008030): /* Kryo 385 Silver (Cortex-A55) */ + case UINT32_C(0x51008040): /* Kryo 485 Gold (Cortex-A76) */ + case UINT32_C(0x51008050): /* Kryo 485 Silver (Cortex-A55) */ + case UINT32_C(0x53000030): /* Exynos M4 */ + case UINT32_C(0x53000040): /* Exynos M5 */ + fp16_flag_ = true; + } +#elif defined(ENABLE_ARM64) + int hwcap_type = 16; + uint32_t hwcap = getHwCap(hwcap_type); + if (hwcap & HWCAP_FPHP) { + MS_LOG(DEBUG) << "Hw cap support FP16, hwcap: 0x" << hwcap; + fp16_flag_ = true; + } else { + MS_LOG(DEBUG) << "Hw cap NOT support FP16, hwcap: 0x" << hwcap; + } +#endif + return fp16_flag_; +} +} // namespace mindspore::lite +#endif diff --git a/mindspore/lite/src/cpu_info.h b/mindspore/lite/src/cpu_info.h new file mode 100644 index 00000000000..336c48b23ef --- /dev/null +++ b/mindspore/lite/src/cpu_info.h @@ -0,0 +1,52 @@ +/** + * Copyright 2021 Huawei Technologies Co., Ltd + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +#ifdef ENABLE_ARM +#include +#ifndef MINDSPORE_LITE_SRC_CPU_INFO_H +#define MINDSPORE_LITE_SRC_CPU_INFO_H +namespace mindspore::lite { +#define CPUINFO_HARDWARE_VALUE_MAX 64 +/* As per include/sys/system_properties.h in Android NDK */ +#define CPUINFO_ARM_MIDR_IMPLEMENTER_MASK UINT32_C(0xFF000000) +#define CPUINFO_ARM_MIDR_PART_MASK UINT32_C(0x0000FFF0) +#define CPUINFO_ARM_MIDR_IMPLEMENTER_OFFSET 24 +#define CPUINFO_ARM_MIDR_PART_OFFSET 4 +typedef struct AndroidCpuInfo { + uint32_t cpu_implementer = 0; + uint32_t cpu_part = 0; + std::string hardware = ""; +} AndroidCpuInfo; + +class CpuInfo { + public: + CpuInfo() = default; + virtual ~CpuInfo() = default; + void GetArmProcCpuInfo(AndroidCpuInfo *android_cpu_info); + uint32_t ParseArmCpuImplementer(const std::string &suffix); + uint32_t ParseArmCpuPart(const std::string &suffix); + uint32_t MidrSetPart(uint32_t part); + uint32_t MidrSetImplementer(uint32_t implementer); + bool ArmIsSupportFp16(); + uint32_t StringToDigit(const std::string &str); + + private: + bool fp16_flag_ = false; + uint32_t midr_ = 0; + AndroidCpuInfo android_cpu_info_; +}; +} // namespace mindspore::lite +#endif // MINDSPORE_LITE_SRC_CPU_INFO_H +#endif diff --git a/mindspore/lite/src/inner_context.cc b/mindspore/lite/src/inner_context.cc index 4e2c036dde0..248a4370bcf 100644 --- a/mindspore/lite/src/inner_context.cc +++ b/mindspore/lite/src/inner_context.cc @@ -13,11 +13,6 @@ * See the License for the specific language governing permissions and * limitations under the License. */ - -#ifdef __ANDROID__ -#include -#include -#endif #include "src/inner_context.h" #include "include/errorcode.h" #include "src/common/log_adapter.h" @@ -37,6 +32,10 @@ InnerContext::InnerContext(const Context *context) { for (auto &device_ctx : context->device_list_) { this->device_list_.push_back(device_ctx); } +#ifdef ENABLE_ARM + cpu_info_ = new CpuInfo; + fp16_flag_ = cpu_info_->ArmIsSupportFp16(); +#endif } #if SUPPORT_NPU @@ -59,6 +58,10 @@ InnerContext::InnerContext(const Context *context, NPUManager *npu_manager) { } } this->npu_manager_ = npu_manager; +#ifdef ENABLE_ARM + cpu_info_ = new CpuInfo; + fp16_flag_ = cpu_info_->ArmIsSupportFp16(); +#endif } #endif @@ -97,6 +100,9 @@ InnerContext::~InnerContext() { free(this->thread_pool_); this->thread_pool_ = nullptr; } +#ifdef ENABLE_ARM + delete cpu_info_; +#endif } int InnerContext::IsValid() const { @@ -220,23 +226,5 @@ NpuDeviceInfo InnerContext::GetNpuInfo() const { } // Support CPU backend to judge whether it supports Float16. -bool InnerContext::IsSupportFloat16() const { - bool status = false; - -#if defined(ENABLE_ARM64) -#if defined(__ANDROID__) - int hwcap_type = 16; - uint32_t hwcap = getHwCap(hwcap_type); - if (hwcap & HWCAP_FPHP) { - MS_LOG(DEBUG) << "Hw cap support FP16, hwcap: 0x" << hwcap; - status = true; - } else { - MS_LOG(DEBUG) << "Hw cap NOT support FP16, hwcap: 0x" << hwcap; - status = false; - } -#endif -#endif - return status; -} - +bool InnerContext::IsSupportFloat16() const { return fp16_flag_; } } // namespace mindspore::lite diff --git a/mindspore/lite/src/inner_context.h b/mindspore/lite/src/inner_context.h index 8a84ae6fccb..2842c101891 100644 --- a/mindspore/lite/src/inner_context.h +++ b/mindspore/lite/src/inner_context.h @@ -20,6 +20,9 @@ #include "include/context.h" #include "src/runtime/runtime_api.h" #include "src/runtime/allocator.h" +#ifdef ENABLE_ARM +#include "src/cpu_info.h" +#endif #ifdef SUPPORT_NPU #include "src/runtime/agent/npu/npu_manager.h" #endif @@ -67,9 +70,13 @@ struct InnerContext : public Context { bool IsSupportFloat16() const; -#if SUPPORT_NPU + bool fp16_flag_ = false; - private: +#ifdef ENABLE_ARM + CpuInfo *cpu_info_ = nullptr; +#endif + +#if SUPPORT_NPU NPUManager *npu_manager_ = nullptr; #endif }; diff --git a/mindspore/lite/src/weight_decoder.cc b/mindspore/lite/src/weight_decoder.cc index b90e8e78d10..8d8294fd043 100644 --- a/mindspore/lite/src/weight_decoder.cc +++ b/mindspore/lite/src/weight_decoder.cc @@ -197,7 +197,7 @@ int WeightDecoder::DequantWeight(lite::Tensor *input_tensor, bool channel_first, input_tensor->set_own_data(true); input_tensor->set_data_type(dst_data_type); } else if (input_tensor->data_type() == kNumberTypeInt16 && dst_data_type == kNumberTypeFloat16) { -#if defined(ENABLE_ARM64) && defined(ENABLE_FP16) +#if defined(ENABLE_ARM) && defined(ENABLE_FP16) auto new_const_data = DequantData(input_tensor, channel_first); input_tensor->set_data(new_const_data); input_tensor->set_own_data(true); @@ -212,7 +212,7 @@ int WeightDecoder::DequantWeight(lite::Tensor *input_tensor, bool channel_first, input_tensor->set_own_data(true); input_tensor->set_data_type(dst_data_type); } else if (input_tensor->data_type() == kNumberTypeInt8 && dst_data_type == kNumberTypeFloat16) { -#if defined(ENABLE_ARM64) && defined(ENABLE_FP16) +#if defined(ENABLE_ARM) && defined(ENABLE_FP16) auto new_const_data = DequantData(input_tensor, channel_first); input_tensor->set_data(new_const_data); input_tensor->set_own_data(true); diff --git a/mindspore/lite/src/weight_decoder.h b/mindspore/lite/src/weight_decoder.h index 6a564a0cb8a..b3ecfd44c1b 100644 --- a/mindspore/lite/src/weight_decoder.h +++ b/mindspore/lite/src/weight_decoder.h @@ -171,8 +171,12 @@ class WeightDecoder { if (!channel_first) { index = channels * j + i; } - auto dequant_data = (quant_datas[index] - zero_point) * scale; - dequant_datas[index] = static_cast
(dequant_data * var_corr + mean_corr); +#ifdef ENABLE_ARM32 + volatile float dequant_data = (quant_datas[index] - zero_point) * scale * var_corr + mean_corr; + dequant_datas[index] = static_cast
(dequant_data); +#else + dequant_datas[index] = static_cast
((quant_datas[index] - zero_point) * scale * var_corr + mean_corr); +#endif } } } else { @@ -190,7 +194,12 @@ class WeightDecoder { } dequant_datas[j] = static_cast
(param.clusters[index - INT8_MIN]); } else { +#ifdef ENABLE_ARM32 + volatile float dequant_data = (quant_datas[j] - zero_point) * scale; + dequant_datas[j] = static_cast
(dequant_data); +#else dequant_datas[j] = static_cast
((quant_datas[j] - zero_point) * scale); +#endif } } } diff --git a/mindspore/lite/test/CMakeLists.txt b/mindspore/lite/test/CMakeLists.txt index f07260965b6..462f9b5d9ad 100644 --- a/mindspore/lite/test/CMakeLists.txt +++ b/mindspore/lite/test/CMakeLists.txt @@ -167,6 +167,7 @@ set(TEST_LITE_SRC ${LITE_DIR}/tools/benchmark/benchmark.cc ${LITE_DIR}/test/st/benchmark_test.cc ${LITE_DIR}/src/errorcode.cc + ${LITE_DIR}/src/cpu_info.cc ) ### gpu runtime if(SUPPORT_GPU STREQUAL opencl) diff --git a/mindspore/lite/test/run_benchmark_nets.sh b/mindspore/lite/test/run_benchmark_nets.sh index fb10be6e6fe..382cbc7eaa8 100644 --- a/mindspore/lite/test/run_benchmark_nets.sh +++ b/mindspore/lite/test/run_benchmark_nets.sh @@ -1898,7 +1898,9 @@ function Run_arm32() { if [ -f ${arm32_path}/mindspore-lite-${version}-inference-android-aarch32/inference/lib/libminddata-lite.so ]; then cp -a ${arm32_path}/mindspore-lite-${version}-inference-android-aarch32/inference/lib/libminddata-lite.so ${benchmark_test_path}/libminddata-lite.so || exit 1 fi - + cp -a ${arm32_path}/mindspore-lite-${version}-inference-android-aarch32/inference/third_party/hiai_ddk/lib/libhiai.so ${benchmark_test_path}/libhiai.so || exit 1 + cp -a ${arm32_path}/mindspore-lite-${version}-inference-android-aarch32/inference/third_party/hiai_ddk/lib/libhiai_ir.so ${benchmark_test_path}/libhiai_ir.so || exit 1 + cp -a ${arm32_path}/mindspore-lite-${version}-inference-android-aarch32/inference/third_party/hiai_ddk/lib/libhiai_ir_build.so ${benchmark_test_path}/libhiai_ir_build.so || exit 1 cp -a ${arm32_path}/mindspore-lite-${version}-inference-android-aarch32/inference/lib/libmindspore-lite.so ${benchmark_test_path}/libmindspore-lite.so || exit 1 cp -a ${arm32_path}/mindspore-lite-${version}-inference-android-aarch32/tools/benchmark/benchmark ${benchmark_test_path}/benchmark || exit 1 @@ -2124,7 +2126,9 @@ function Run_armv82_a32_fp16() { if [ -f ${armv82_path}/mindspore-lite-${version}-inference-android-aarch32/inference/minddata/lib/libminddata-lite.so ]; then cp -a ${armv82_path}/mindspore-lite-${version}-inference-android-aarch32/inference/minddata/lib/libminddata-lite.so ${benchmark_test_path}/libminddata-lite.so || exit 1 fi - + cp -a ${armv82_path}/mindspore-lite-${version}-inference-android-aarch32/inference/third_party/hiai_ddk/lib/libhiai.so ${benchmark_test_path}/libhiai.so || exit 1 + cp -a ${armv82_path}/mindspore-lite-${version}-inference-android-aarch32/inference/third_party/hiai_ddk/lib/libhiai_ir.so ${benchmark_test_path}/libhiai_ir.so || exit 1 + cp -a ${armv82_path}/mindspore-lite-${version}-inference-android-aarch32/inference/third_party/hiai_ddk/lib/libhiai_ir_build.so ${benchmark_test_path}/libhiai_ir_build.so || exit 1 cp -a ${armv82_path}/mindspore-lite-${version}-inference-android-aarch32/inference/lib/libmindspore-lite.so ${benchmark_test_path}/libmindspore-lite.so || exit 1 cp -a ${armv82_path}/mindspore-lite-${version}-inference-android-aarch32/tools/benchmark/benchmark ${benchmark_test_path}/benchmark || exit 1 diff --git a/mindspore/lite/test/run_net_train.sh b/mindspore/lite/test/run_net_train.sh index 6dc0282caa9..5cde22a6358 100755 --- a/mindspore/lite/test/run_net_train.sh +++ b/mindspore/lite/test/run_net_train.sh @@ -149,7 +149,7 @@ function Run_arm() { cp -a ${arm_path}/mindspore-lite-${version_arm}-train-android-${process_unit}/train/third_party/libjpeg-turbo/lib/libturbojpeg.so* ${benchmark_train_test_path}/ || exit 1 cp -a ${arm_path}/mindspore-lite-${version_arm}-train-android-${process_unit}/train/lib/libminddata-lite.so ${benchmark_train_test_path}/libminddata-lite.so || exit 1 fi - if [ "$1" == arm64 ]; then + if [ "$1" == arm64 ] || [ "$1" == arm32 ]; then cp -a ${arm_path}/mindspore-lite-${version_arm}-train-android-${process_unit}/train/third_party/hiai_ddk/lib/libhiai.so ${benchmark_train_test_path}/libhiai.so || exit 1 cp -a ${arm_path}/mindspore-lite-${version_arm}-train-android-${process_unit}/train/third_party/hiai_ddk/lib/libhiai_ir.so ${benchmark_train_test_path}/libhiai_ir.so || exit 1 cp -a ${arm_path}/mindspore-lite-${version_arm}-train-android-${process_unit}/train/third_party/hiai_ddk/lib/libhiai_ir_build.so ${benchmark_train_test_path}/libhiai_ir_build.so || exit 1