!16410 arm32 fp16 support npu and weight quant
From: @lzkcode Reviewed-by: @zhang_xue_tong,@zhanghaibo5 Signed-off-by: @zhang_xue_tong
This commit is contained in:
commit
66ea5f4ea1
13
build.sh
13
build.sh
|
@ -577,10 +577,14 @@ build_lite()
|
||||||
find . -maxdepth 1 | grep -v java | grep '/' | xargs -I {} rm -rf {}
|
find . -maxdepth 1 | grep -v java | grep '/' | xargs -I {} rm -rf {}
|
||||||
fi
|
fi
|
||||||
if [[ "${LITE_LANGUAGE}" == "cpp" ]]; then
|
if [[ "${LITE_LANGUAGE}" == "cpp" ]]; then
|
||||||
if [[ "${DEVICE}" == "" && "${LOCAL_LITE_PLATFORM}" == "arm64" ]]; then
|
if [[ "${DEVICE}" == "" ]]; then
|
||||||
LOCAL_LITE_ENABLE_GPU="opencl"
|
if [[ "${LOCAL_LITE_PLATFORM}" == "arm64" || "${LOCAL_LITE_PLATFORM}" == "arm32" ]]; then
|
||||||
LOCAL_LITE_ENABLE_NPU="on"
|
LOCAL_LITE_ENABLE_NPU="on"
|
||||||
fi
|
fi
|
||||||
|
if [[ "${LOCAL_LITE_PLATFORM}" == "arm64" ]]; then
|
||||||
|
LOCAL_LITE_ENABLE_GPU="opencl"
|
||||||
|
fi
|
||||||
|
fi
|
||||||
|
|
||||||
if [[ "${LOCAL_INC_BUILD}" == "off" ]]; then
|
if [[ "${LOCAL_INC_BUILD}" == "off" ]]; then
|
||||||
rm -rf ${BASEPATH}/mindspore/lite/build
|
rm -rf ${BASEPATH}/mindspore/lite/build
|
||||||
|
@ -589,12 +593,7 @@ build_lite()
|
||||||
fi
|
fi
|
||||||
|
|
||||||
if [ "${LOCAL_LITE_ENABLE_NPU}" == "on" ]; then
|
if [ "${LOCAL_LITE_ENABLE_NPU}" == "on" ]; then
|
||||||
if [ "${LOCAL_LITE_PLATFORM}" == "arm64" ]; then
|
|
||||||
checkddk
|
checkddk
|
||||||
else
|
|
||||||
echo "NPU only support platform arm64."
|
|
||||||
exit 1
|
|
||||||
fi
|
|
||||||
fi
|
fi
|
||||||
|
|
||||||
cd ${BASEPATH}/mindspore/lite/build
|
cd ${BASEPATH}/mindspore/lite/build
|
||||||
|
|
|
@ -192,6 +192,14 @@ if(PLATFORM_ARM64)
|
||||||
install(TARGETS ${BENCHMARK_NAME} RUNTIME DESTINATION ${BENCHMARK_ROOT_DIR} COMPONENT ${RUNTIME_COMPONENT_NAME})
|
install(TARGETS ${BENCHMARK_NAME} RUNTIME DESTINATION ${BENCHMARK_ROOT_DIR} COMPONENT ${RUNTIME_COMPONENT_NAME})
|
||||||
endif()
|
endif()
|
||||||
elseif(PLATFORM_ARM32)
|
elseif(PLATFORM_ARM32)
|
||||||
|
if(SUPPORT_NPU)
|
||||||
|
install(FILES ${DDK_LIB_PATH}/libhiai.so DESTINATION ${RUNTIME_DIR}/third_party/hiai_ddk/lib
|
||||||
|
COMPONENT ${RUNTIME_COMPONENT_NAME})
|
||||||
|
install(FILES ${DDK_LIB_PATH}/libhiai_ir.so DESTINATION ${RUNTIME_DIR}/third_party/hiai_ddk/lib
|
||||||
|
COMPONENT ${RUNTIME_COMPONENT_NAME})
|
||||||
|
install(FILES ${DDK_LIB_PATH}/libhiai_ir_build.so DESTINATION ${RUNTIME_DIR}/third_party/hiai_ddk/lib
|
||||||
|
COMPONENT ${RUNTIME_COMPONENT_NAME})
|
||||||
|
endif()
|
||||||
if(SUPPORT_TRAIN)
|
if(SUPPORT_TRAIN)
|
||||||
install(DIRECTORY ${TOP_DIR}/mindspore/lite/include/ DESTINATION ${RUNTIME_INC_DIR}
|
install(DIRECTORY ${TOP_DIR}/mindspore/lite/include/ DESTINATION ${RUNTIME_INC_DIR}
|
||||||
COMPONENT ${RUNTIME_COMPONENT_NAME} FILES_MATCHING PATTERN "*.h")
|
COMPONENT ${RUNTIME_COMPONENT_NAME} FILES_MATCHING PATTERN "*.h")
|
||||||
|
|
|
@ -73,6 +73,7 @@ set(LITE_SRC
|
||||||
${CMAKE_CURRENT_SOURCE_DIR}/errorcode.cc
|
${CMAKE_CURRENT_SOURCE_DIR}/errorcode.cc
|
||||||
${CMAKE_CURRENT_SOURCE_DIR}/weight_decoder.cc
|
${CMAKE_CURRENT_SOURCE_DIR}/weight_decoder.cc
|
||||||
${CMAKE_CURRENT_SOURCE_DIR}/huffman_decode.cc
|
${CMAKE_CURRENT_SOURCE_DIR}/huffman_decode.cc
|
||||||
|
${CMAKE_CURRENT_SOURCE_DIR}/cpu_info.cc
|
||||||
)
|
)
|
||||||
|
|
||||||
if(SUPPORT_GPU STREQUAL opencl)
|
if(SUPPORT_GPU STREQUAL opencl)
|
||||||
|
|
|
@ -0,0 +1,138 @@
|
||||||
|
/**
|
||||||
|
* Copyright 2021 Huawei Technologies Co., Ltd
|
||||||
|
*
|
||||||
|
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||||
|
* you may not use this file except in compliance with the License.
|
||||||
|
* You may obtain a copy of the License at
|
||||||
|
*
|
||||||
|
* http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
*
|
||||||
|
* Unless required by applicable law or agreed to in writing, software
|
||||||
|
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
* See the License for the specific language governing permissions and
|
||||||
|
* limitations under the License.
|
||||||
|
*/
|
||||||
|
#ifdef ENABLE_ARM
|
||||||
|
#include "src/cpu_info.h"
|
||||||
|
#include <sys/auxv.h>
|
||||||
|
#include <asm/hwcap.h>
|
||||||
|
#include <fstream>
|
||||||
|
#include "src/common/log_adapter.h"
|
||||||
|
#include "nnacl/nnacl_utils.h"
|
||||||
|
|
||||||
|
namespace mindspore::lite {
|
||||||
|
uint32_t CpuInfo::MidrSetPart(uint32_t part) {
|
||||||
|
return (midr_ & ~CPUINFO_ARM_MIDR_PART_MASK) | ((part << CPUINFO_ARM_MIDR_PART_OFFSET) & CPUINFO_ARM_MIDR_PART_MASK);
|
||||||
|
}
|
||||||
|
|
||||||
|
uint32_t CpuInfo::MidrSetImplementer(uint32_t implementer) {
|
||||||
|
return (midr_ & ~CPUINFO_ARM_MIDR_IMPLEMENTER_MASK) |
|
||||||
|
((implementer << CPUINFO_ARM_MIDR_IMPLEMENTER_OFFSET) & CPUINFO_ARM_MIDR_IMPLEMENTER_MASK);
|
||||||
|
}
|
||||||
|
|
||||||
|
uint32_t CpuInfo::StringToDigit(const std::string &str) {
|
||||||
|
// hex string to digit
|
||||||
|
// verify hex prefix '0' and 'x'
|
||||||
|
if (str[0] != '0' || str[1] != 'x') {
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
auto str_length = str.length();
|
||||||
|
uint32_t str_digit = 0;
|
||||||
|
for (unsigned int i = 2; i < str_length; ++i) {
|
||||||
|
auto tmp_char = str[i];
|
||||||
|
uint32_t digit;
|
||||||
|
if (tmp_char >= '0' && tmp_char <= '9') {
|
||||||
|
digit = tmp_char - '0';
|
||||||
|
} else if ((uint32_t)(tmp_char - 'A') < 6) {
|
||||||
|
digit = 10 + (tmp_char - 'A');
|
||||||
|
} else if ((uint32_t)(tmp_char - 'a') < 6) {
|
||||||
|
digit = 10 + (tmp_char - 'a');
|
||||||
|
} else {
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
str_digit = str_digit * 16 + digit;
|
||||||
|
}
|
||||||
|
return str_digit;
|
||||||
|
}
|
||||||
|
|
||||||
|
uint32_t CpuInfo::ParseArmCpuPart(const std::string &cpu_part) {
|
||||||
|
// cpu_part string length is in [3, 5]
|
||||||
|
auto cpu_part_length = cpu_part.length();
|
||||||
|
if (cpu_part_length < 3 || cpu_part_length > 5) {
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
return StringToDigit(cpu_part);
|
||||||
|
}
|
||||||
|
|
||||||
|
uint32_t CpuInfo::ParseArmCpuImplementer(const std::string &str) {
|
||||||
|
auto str_length = str.length();
|
||||||
|
switch (str_length) {
|
||||||
|
case 3:
|
||||||
|
case 4:
|
||||||
|
break;
|
||||||
|
default:
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
return StringToDigit(str);
|
||||||
|
}
|
||||||
|
|
||||||
|
/* Only get hardware and midr now*/
|
||||||
|
void CpuInfo::GetArmProcCpuInfo(AndroidCpuInfo *android_cpu_info) {
|
||||||
|
std::ifstream infile("/proc/cpuinfo", std::ios::in);
|
||||||
|
std::string line;
|
||||||
|
while (getline(infile, line)) {
|
||||||
|
for (unsigned int i = 0; i < line.length(); ++i) {
|
||||||
|
if (line[i] == ':') {
|
||||||
|
std::string prefix = line.substr(0, i);
|
||||||
|
prefix.erase(0, prefix.find_first_not_of(' '));
|
||||||
|
prefix.erase(prefix.find_last_not_of('\t') + 1);
|
||||||
|
std::string suffix = line.substr(i + 2);
|
||||||
|
if (prefix == "CPU implementer" && android_cpu_info->cpu_implementer == 0) {
|
||||||
|
android_cpu_info->cpu_implementer = ParseArmCpuImplementer(suffix);
|
||||||
|
} else if (prefix == "CPU part" && android_cpu_info->cpu_part == 0) {
|
||||||
|
android_cpu_info->cpu_part = ParseArmCpuPart(suffix);
|
||||||
|
} else if (prefix == "Hardware" && android_cpu_info->hardware.empty()) {
|
||||||
|
android_cpu_info->hardware = suffix;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
infile.close();
|
||||||
|
}
|
||||||
|
|
||||||
|
bool CpuInfo::ArmIsSupportFp16() {
|
||||||
|
#ifdef ENABLE_ARM32
|
||||||
|
GetArmProcCpuInfo(&android_cpu_info_);
|
||||||
|
midr_ = MidrSetPart(android_cpu_info_.cpu_part);
|
||||||
|
midr_ = MidrSetImplementer(android_cpu_info_.cpu_implementer);
|
||||||
|
switch (midr_ & (CPUINFO_ARM_MIDR_IMPLEMENTER_MASK | CPUINFO_ARM_MIDR_PART_MASK)) {
|
||||||
|
case UINT32_C(0x4100D050): /* Cortex-A55 */
|
||||||
|
case UINT32_C(0x4100D060): /* Cortex-A65 */
|
||||||
|
case UINT32_C(0x4100D0B0): /* Cortex-A76 */
|
||||||
|
case UINT32_C(0x4100D0C0): /* Neoverse N1 */
|
||||||
|
case UINT32_C(0x4100D0D0): /* Cortex-A77 */
|
||||||
|
case UINT32_C(0x4100D0E0): /* Cortex-A76AE */
|
||||||
|
case UINT32_C(0x4800D400): /* Cortex-A76 (HiSilicon) */
|
||||||
|
case UINT32_C(0x51008020): /* Kryo 385 Gold (Cortex-A75) */
|
||||||
|
case UINT32_C(0x51008030): /* Kryo 385 Silver (Cortex-A55) */
|
||||||
|
case UINT32_C(0x51008040): /* Kryo 485 Gold (Cortex-A76) */
|
||||||
|
case UINT32_C(0x51008050): /* Kryo 485 Silver (Cortex-A55) */
|
||||||
|
case UINT32_C(0x53000030): /* Exynos M4 */
|
||||||
|
case UINT32_C(0x53000040): /* Exynos M5 */
|
||||||
|
fp16_flag_ = true;
|
||||||
|
}
|
||||||
|
#elif defined(ENABLE_ARM64)
|
||||||
|
int hwcap_type = 16;
|
||||||
|
uint32_t hwcap = getHwCap(hwcap_type);
|
||||||
|
if (hwcap & HWCAP_FPHP) {
|
||||||
|
MS_LOG(DEBUG) << "Hw cap support FP16, hwcap: 0x" << hwcap;
|
||||||
|
fp16_flag_ = true;
|
||||||
|
} else {
|
||||||
|
MS_LOG(DEBUG) << "Hw cap NOT support FP16, hwcap: 0x" << hwcap;
|
||||||
|
}
|
||||||
|
#endif
|
||||||
|
return fp16_flag_;
|
||||||
|
}
|
||||||
|
} // namespace mindspore::lite
|
||||||
|
#endif
|
|
@ -0,0 +1,52 @@
|
||||||
|
/**
|
||||||
|
* Copyright 2021 Huawei Technologies Co., Ltd
|
||||||
|
*
|
||||||
|
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||||
|
* you may not use this file except in compliance with the License.
|
||||||
|
* You may obtain a copy of the License at
|
||||||
|
*
|
||||||
|
* http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
*
|
||||||
|
* Unless required by applicable law or agreed to in writing, software
|
||||||
|
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
* See the License for the specific language governing permissions and
|
||||||
|
* limitations under the License.
|
||||||
|
*/
|
||||||
|
#ifdef ENABLE_ARM
|
||||||
|
#include <string>
|
||||||
|
#ifndef MINDSPORE_LITE_SRC_CPU_INFO_H
|
||||||
|
#define MINDSPORE_LITE_SRC_CPU_INFO_H
|
||||||
|
namespace mindspore::lite {
|
||||||
|
#define CPUINFO_HARDWARE_VALUE_MAX 64
|
||||||
|
/* As per include/sys/system_properties.h in Android NDK */
|
||||||
|
#define CPUINFO_ARM_MIDR_IMPLEMENTER_MASK UINT32_C(0xFF000000)
|
||||||
|
#define CPUINFO_ARM_MIDR_PART_MASK UINT32_C(0x0000FFF0)
|
||||||
|
#define CPUINFO_ARM_MIDR_IMPLEMENTER_OFFSET 24
|
||||||
|
#define CPUINFO_ARM_MIDR_PART_OFFSET 4
|
||||||
|
typedef struct AndroidCpuInfo {
|
||||||
|
uint32_t cpu_implementer = 0;
|
||||||
|
uint32_t cpu_part = 0;
|
||||||
|
std::string hardware = "";
|
||||||
|
} AndroidCpuInfo;
|
||||||
|
|
||||||
|
class CpuInfo {
|
||||||
|
public:
|
||||||
|
CpuInfo() = default;
|
||||||
|
virtual ~CpuInfo() = default;
|
||||||
|
void GetArmProcCpuInfo(AndroidCpuInfo *android_cpu_info);
|
||||||
|
uint32_t ParseArmCpuImplementer(const std::string &suffix);
|
||||||
|
uint32_t ParseArmCpuPart(const std::string &suffix);
|
||||||
|
uint32_t MidrSetPart(uint32_t part);
|
||||||
|
uint32_t MidrSetImplementer(uint32_t implementer);
|
||||||
|
bool ArmIsSupportFp16();
|
||||||
|
uint32_t StringToDigit(const std::string &str);
|
||||||
|
|
||||||
|
private:
|
||||||
|
bool fp16_flag_ = false;
|
||||||
|
uint32_t midr_ = 0;
|
||||||
|
AndroidCpuInfo android_cpu_info_;
|
||||||
|
};
|
||||||
|
} // namespace mindspore::lite
|
||||||
|
#endif // MINDSPORE_LITE_SRC_CPU_INFO_H
|
||||||
|
#endif
|
|
@ -13,11 +13,6 @@
|
||||||
* See the License for the specific language governing permissions and
|
* See the License for the specific language governing permissions and
|
||||||
* limitations under the License.
|
* limitations under the License.
|
||||||
*/
|
*/
|
||||||
|
|
||||||
#ifdef __ANDROID__
|
|
||||||
#include <sys/auxv.h>
|
|
||||||
#include <asm/hwcap.h>
|
|
||||||
#endif
|
|
||||||
#include "src/inner_context.h"
|
#include "src/inner_context.h"
|
||||||
#include "include/errorcode.h"
|
#include "include/errorcode.h"
|
||||||
#include "src/common/log_adapter.h"
|
#include "src/common/log_adapter.h"
|
||||||
|
@ -38,6 +33,10 @@ InnerContext::InnerContext(const Context *context) {
|
||||||
for (auto &device_ctx : context->device_list_) {
|
for (auto &device_ctx : context->device_list_) {
|
||||||
this->device_list_.push_back(device_ctx);
|
this->device_list_.push_back(device_ctx);
|
||||||
}
|
}
|
||||||
|
#ifdef ENABLE_ARM
|
||||||
|
cpu_info_ = new CpuInfo;
|
||||||
|
fp16_flag_ = cpu_info_->ArmIsSupportFp16();
|
||||||
|
#endif
|
||||||
}
|
}
|
||||||
|
|
||||||
#if SUPPORT_NPU
|
#if SUPPORT_NPU
|
||||||
|
@ -60,6 +59,10 @@ InnerContext::InnerContext(const Context *context, NPUManager *npu_manager) {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
this->npu_manager_ = npu_manager;
|
this->npu_manager_ = npu_manager;
|
||||||
|
#ifdef ENABLE_ARM
|
||||||
|
cpu_info_ = new CpuInfo;
|
||||||
|
fp16_flag_ = cpu_info_->ArmIsSupportFp16();
|
||||||
|
#endif
|
||||||
}
|
}
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
|
@ -111,6 +114,9 @@ InnerContext::~InnerContext() {
|
||||||
free(this->thread_pool_);
|
free(this->thread_pool_);
|
||||||
this->thread_pool_ = nullptr;
|
this->thread_pool_ = nullptr;
|
||||||
}
|
}
|
||||||
|
#ifdef ENABLE_ARM
|
||||||
|
delete cpu_info_;
|
||||||
|
#endif
|
||||||
}
|
}
|
||||||
|
|
||||||
int InnerContext::IsValid() const {
|
int InnerContext::IsValid() const {
|
||||||
|
@ -250,23 +256,5 @@ NpuDeviceInfo InnerContext::GetNpuInfo() const {
|
||||||
}
|
}
|
||||||
|
|
||||||
// Support CPU backend to judge whether it supports Float16.
|
// Support CPU backend to judge whether it supports Float16.
|
||||||
bool InnerContext::IsSupportFloat16() const {
|
bool InnerContext::IsSupportFloat16() const { return fp16_flag_; }
|
||||||
bool status = false;
|
|
||||||
|
|
||||||
#if defined(ENABLE_ARM64)
|
|
||||||
#if defined(__ANDROID__)
|
|
||||||
int hwcap_type = 16;
|
|
||||||
uint32_t hwcap = getHwCap(hwcap_type);
|
|
||||||
if (hwcap & HWCAP_FPHP) {
|
|
||||||
MS_LOG(DEBUG) << "Hw cap support FP16, hwcap: 0x" << hwcap;
|
|
||||||
status = true;
|
|
||||||
} else {
|
|
||||||
MS_LOG(DEBUG) << "Hw cap NOT support FP16, hwcap: 0x" << hwcap;
|
|
||||||
status = false;
|
|
||||||
}
|
|
||||||
#endif
|
|
||||||
#endif
|
|
||||||
return status;
|
|
||||||
}
|
|
||||||
|
|
||||||
} // namespace mindspore::lite
|
} // namespace mindspore::lite
|
||||||
|
|
|
@ -21,6 +21,9 @@
|
||||||
#include "include/context.h"
|
#include "include/context.h"
|
||||||
#include "src/runtime/runtime_api.h"
|
#include "src/runtime/runtime_api.h"
|
||||||
#include "src/runtime/allocator.h"
|
#include "src/runtime/allocator.h"
|
||||||
|
#ifdef ENABLE_ARM
|
||||||
|
#include "src/cpu_info.h"
|
||||||
|
#endif
|
||||||
#ifdef SUPPORT_NPU
|
#ifdef SUPPORT_NPU
|
||||||
#include "src/runtime/agent/npu/npu_manager.h"
|
#include "src/runtime/agent/npu/npu_manager.h"
|
||||||
#endif
|
#endif
|
||||||
|
@ -72,9 +75,13 @@ struct InnerContext : public Context {
|
||||||
|
|
||||||
bool IsSupportFloat16() const;
|
bool IsSupportFloat16() const;
|
||||||
|
|
||||||
#if SUPPORT_NPU
|
bool fp16_flag_ = false;
|
||||||
|
|
||||||
private:
|
#ifdef ENABLE_ARM
|
||||||
|
CpuInfo *cpu_info_ = nullptr;
|
||||||
|
#endif
|
||||||
|
|
||||||
|
#if SUPPORT_NPU
|
||||||
NPUManager *npu_manager_ = nullptr;
|
NPUManager *npu_manager_ = nullptr;
|
||||||
#endif
|
#endif
|
||||||
};
|
};
|
||||||
|
|
|
@ -197,7 +197,7 @@ int WeightDecoder::DequantWeight(lite::Tensor *input_tensor, bool channel_first,
|
||||||
input_tensor->set_own_data(true);
|
input_tensor->set_own_data(true);
|
||||||
input_tensor->set_data_type(dst_data_type);
|
input_tensor->set_data_type(dst_data_type);
|
||||||
} else if (input_tensor->data_type() == kNumberTypeInt16 && dst_data_type == kNumberTypeFloat16) {
|
} else if (input_tensor->data_type() == kNumberTypeInt16 && dst_data_type == kNumberTypeFloat16) {
|
||||||
#if defined(ENABLE_ARM64) && defined(ENABLE_FP16)
|
#if defined(ENABLE_ARM) && defined(ENABLE_FP16)
|
||||||
auto new_const_data = DequantData<int16_t, float16_t>(input_tensor, channel_first);
|
auto new_const_data = DequantData<int16_t, float16_t>(input_tensor, channel_first);
|
||||||
input_tensor->set_data(new_const_data);
|
input_tensor->set_data(new_const_data);
|
||||||
input_tensor->set_own_data(true);
|
input_tensor->set_own_data(true);
|
||||||
|
@ -212,7 +212,7 @@ int WeightDecoder::DequantWeight(lite::Tensor *input_tensor, bool channel_first,
|
||||||
input_tensor->set_own_data(true);
|
input_tensor->set_own_data(true);
|
||||||
input_tensor->set_data_type(dst_data_type);
|
input_tensor->set_data_type(dst_data_type);
|
||||||
} else if (input_tensor->data_type() == kNumberTypeInt8 && dst_data_type == kNumberTypeFloat16) {
|
} else if (input_tensor->data_type() == kNumberTypeInt8 && dst_data_type == kNumberTypeFloat16) {
|
||||||
#if defined(ENABLE_ARM64) && defined(ENABLE_FP16)
|
#if defined(ENABLE_ARM) && defined(ENABLE_FP16)
|
||||||
auto new_const_data = DequantData<int8_t, float16_t>(input_tensor, channel_first);
|
auto new_const_data = DequantData<int8_t, float16_t>(input_tensor, channel_first);
|
||||||
input_tensor->set_data(new_const_data);
|
input_tensor->set_data(new_const_data);
|
||||||
input_tensor->set_own_data(true);
|
input_tensor->set_own_data(true);
|
||||||
|
|
|
@ -171,8 +171,12 @@ class WeightDecoder {
|
||||||
if (!channel_first) {
|
if (!channel_first) {
|
||||||
index = channels * j + i;
|
index = channels * j + i;
|
||||||
}
|
}
|
||||||
auto dequant_data = (quant_datas[index] - zero_point) * scale;
|
#ifdef ENABLE_ARM32
|
||||||
dequant_datas[index] = static_cast<DT>(dequant_data * var_corr + mean_corr);
|
volatile float dequant_data = (quant_datas[index] - zero_point) * scale * var_corr + mean_corr;
|
||||||
|
dequant_datas[index] = static_cast<DT>(dequant_data);
|
||||||
|
#else
|
||||||
|
dequant_datas[index] = static_cast<DT>((quant_datas[index] - zero_point) * scale * var_corr + mean_corr);
|
||||||
|
#endif
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
} else {
|
} else {
|
||||||
|
@ -190,7 +194,12 @@ class WeightDecoder {
|
||||||
}
|
}
|
||||||
dequant_datas[j] = static_cast<DT>(param.clusters[index - INT8_MIN]);
|
dequant_datas[j] = static_cast<DT>(param.clusters[index - INT8_MIN]);
|
||||||
} else {
|
} else {
|
||||||
|
#ifdef ENABLE_ARM32
|
||||||
|
volatile float dequant_data = (quant_datas[j] - zero_point) * scale;
|
||||||
|
dequant_datas[j] = static_cast<DT>(dequant_data);
|
||||||
|
#else
|
||||||
dequant_datas[j] = static_cast<DT>((quant_datas[j] - zero_point) * scale);
|
dequant_datas[j] = static_cast<DT>((quant_datas[j] - zero_point) * scale);
|
||||||
|
#endif
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
|
@ -160,6 +160,7 @@ set(TEST_LITE_SRC
|
||||||
${LITE_DIR}/src/common/string_util.cc
|
${LITE_DIR}/src/common/string_util.cc
|
||||||
${LITE_DIR}/src/common/quant_utils.cc
|
${LITE_DIR}/src/common/quant_utils.cc
|
||||||
${LITE_DIR}/src/errorcode.cc
|
${LITE_DIR}/src/errorcode.cc
|
||||||
|
${LITE_DIR}/src/cpu_info.cc
|
||||||
)
|
)
|
||||||
|
|
||||||
if(ENABLE_TOOLS)
|
if(ENABLE_TOOLS)
|
||||||
|
|
|
@ -1899,7 +1899,9 @@ function Run_arm32() {
|
||||||
if [ -f ${arm32_path}/mindspore-lite-${version}-inference-android-aarch32/inference/lib/libminddata-lite.so ]; then
|
if [ -f ${arm32_path}/mindspore-lite-${version}-inference-android-aarch32/inference/lib/libminddata-lite.so ]; then
|
||||||
cp -a ${arm32_path}/mindspore-lite-${version}-inference-android-aarch32/inference/lib/libminddata-lite.so ${benchmark_test_path}/libminddata-lite.so || exit 1
|
cp -a ${arm32_path}/mindspore-lite-${version}-inference-android-aarch32/inference/lib/libminddata-lite.so ${benchmark_test_path}/libminddata-lite.so || exit 1
|
||||||
fi
|
fi
|
||||||
|
cp -a ${arm32_path}/mindspore-lite-${version}-inference-android-aarch32/inference/third_party/hiai_ddk/lib/libhiai.so ${benchmark_test_path}/libhiai.so || exit 1
|
||||||
|
cp -a ${arm32_path}/mindspore-lite-${version}-inference-android-aarch32/inference/third_party/hiai_ddk/lib/libhiai_ir.so ${benchmark_test_path}/libhiai_ir.so || exit 1
|
||||||
|
cp -a ${arm32_path}/mindspore-lite-${version}-inference-android-aarch32/inference/third_party/hiai_ddk/lib/libhiai_ir_build.so ${benchmark_test_path}/libhiai_ir_build.so || exit 1
|
||||||
cp -a ${arm32_path}/mindspore-lite-${version}-inference-android-aarch32/inference/lib/libmindspore-lite.so ${benchmark_test_path}/libmindspore-lite.so || exit 1
|
cp -a ${arm32_path}/mindspore-lite-${version}-inference-android-aarch32/inference/lib/libmindspore-lite.so ${benchmark_test_path}/libmindspore-lite.so || exit 1
|
||||||
cp -a ${arm32_path}/mindspore-lite-${version}-inference-android-aarch32/inference/lib/libmslite_kernel_reg.so ${benchmark_test_path}/libmslite_kernel_reg.so || exit 1
|
cp -a ${arm32_path}/mindspore-lite-${version}-inference-android-aarch32/inference/lib/libmslite_kernel_reg.so ${benchmark_test_path}/libmslite_kernel_reg.so || exit 1
|
||||||
cp -a ${arm32_path}/mindspore-lite-${version}-inference-android-aarch32/tools/benchmark/benchmark ${benchmark_test_path}/benchmark || exit 1
|
cp -a ${arm32_path}/mindspore-lite-${version}-inference-android-aarch32/tools/benchmark/benchmark ${benchmark_test_path}/benchmark || exit 1
|
||||||
|
@ -2127,7 +2129,9 @@ function Run_armv82_a32_fp16() {
|
||||||
if [ -f ${armv82_path}/mindspore-lite-${version}-inference-android-aarch32/inference/minddata/lib/libminddata-lite.so ]; then
|
if [ -f ${armv82_path}/mindspore-lite-${version}-inference-android-aarch32/inference/minddata/lib/libminddata-lite.so ]; then
|
||||||
cp -a ${armv82_path}/mindspore-lite-${version}-inference-android-aarch32/inference/minddata/lib/libminddata-lite.so ${benchmark_test_path}/libminddata-lite.so || exit 1
|
cp -a ${armv82_path}/mindspore-lite-${version}-inference-android-aarch32/inference/minddata/lib/libminddata-lite.so ${benchmark_test_path}/libminddata-lite.so || exit 1
|
||||||
fi
|
fi
|
||||||
|
cp -a ${armv82_path}/mindspore-lite-${version}-inference-android-aarch32/inference/third_party/hiai_ddk/lib/libhiai.so ${benchmark_test_path}/libhiai.so || exit 1
|
||||||
|
cp -a ${armv82_path}/mindspore-lite-${version}-inference-android-aarch32/inference/third_party/hiai_ddk/lib/libhiai_ir.so ${benchmark_test_path}/libhiai_ir.so || exit 1
|
||||||
|
cp -a ${armv82_path}/mindspore-lite-${version}-inference-android-aarch32/inference/third_party/hiai_ddk/lib/libhiai_ir_build.so ${benchmark_test_path}/libhiai_ir_build.so || exit 1
|
||||||
cp -a ${armv82_path}/mindspore-lite-${version}-inference-android-aarch32/inference/lib/libmindspore-lite.so ${benchmark_test_path}/libmindspore-lite.so || exit 1
|
cp -a ${armv82_path}/mindspore-lite-${version}-inference-android-aarch32/inference/lib/libmindspore-lite.so ${benchmark_test_path}/libmindspore-lite.so || exit 1
|
||||||
cp -a ${armv82_path}/mindspore-lite-${version}-inference-android-aarch32/inference/lib/libmslite_kernel_reg.so ${benchmark_test_path}/libmslite_kernel_reg.so || exit 1
|
cp -a ${armv82_path}/mindspore-lite-${version}-inference-android-aarch32/inference/lib/libmslite_kernel_reg.so ${benchmark_test_path}/libmslite_kernel_reg.so || exit 1
|
||||||
cp -a ${armv82_path}/mindspore-lite-${version}-inference-android-aarch32/tools/benchmark/benchmark ${benchmark_test_path}/benchmark || exit 1
|
cp -a ${armv82_path}/mindspore-lite-${version}-inference-android-aarch32/tools/benchmark/benchmark ${benchmark_test_path}/benchmark || exit 1
|
||||||
|
|
|
@ -149,7 +149,7 @@ function Run_arm() {
|
||||||
cp -a ${arm_path}/mindspore-lite-${version_arm}-train-android-${process_unit}/train/third_party/libjpeg-turbo/lib/libturbojpeg.so* ${benchmark_train_test_path}/ || exit 1
|
cp -a ${arm_path}/mindspore-lite-${version_arm}-train-android-${process_unit}/train/third_party/libjpeg-turbo/lib/libturbojpeg.so* ${benchmark_train_test_path}/ || exit 1
|
||||||
cp -a ${arm_path}/mindspore-lite-${version_arm}-train-android-${process_unit}/train/lib/libminddata-lite.so ${benchmark_train_test_path}/libminddata-lite.so || exit 1
|
cp -a ${arm_path}/mindspore-lite-${version_arm}-train-android-${process_unit}/train/lib/libminddata-lite.so ${benchmark_train_test_path}/libminddata-lite.so || exit 1
|
||||||
fi
|
fi
|
||||||
if [ "$1" == arm64 ]; then
|
if [ "$1" == arm64 ] || [ "$1" == arm32 ]; then
|
||||||
cp -a ${arm_path}/mindspore-lite-${version_arm}-train-android-${process_unit}/train/third_party/hiai_ddk/lib/libhiai.so ${benchmark_train_test_path}/libhiai.so || exit 1
|
cp -a ${arm_path}/mindspore-lite-${version_arm}-train-android-${process_unit}/train/third_party/hiai_ddk/lib/libhiai.so ${benchmark_train_test_path}/libhiai.so || exit 1
|
||||||
cp -a ${arm_path}/mindspore-lite-${version_arm}-train-android-${process_unit}/train/third_party/hiai_ddk/lib/libhiai_ir.so ${benchmark_train_test_path}/libhiai_ir.so || exit 1
|
cp -a ${arm_path}/mindspore-lite-${version_arm}-train-android-${process_unit}/train/third_party/hiai_ddk/lib/libhiai_ir.so ${benchmark_train_test_path}/libhiai_ir.so || exit 1
|
||||||
cp -a ${arm_path}/mindspore-lite-${version_arm}-train-android-${process_unit}/train/third_party/hiai_ddk/lib/libhiai_ir_build.so ${benchmark_train_test_path}/libhiai_ir_build.so || exit 1
|
cp -a ${arm_path}/mindspore-lite-${version_arm}-train-android-${process_unit}/train/third_party/hiai_ddk/lib/libhiai_ir_build.so ${benchmark_train_test_path}/libhiai_ir_build.so || exit 1
|
||||||
|
|
Loading…
Reference in New Issue