forked from mindspore-Ecosystem/mindspore
!8336 [MS][LITE][CPU SSE] add x86_sse in run_benchamrk_nets.sh
From: @lzkcode Reviewed-by: @hangangqiang,@zhang_xue_tong Signed-off-by: @zhang_xue_tong
This commit is contained in:
commit
7d250f2218
|
@ -68,7 +68,7 @@ void PostConvFuncFp32C4(const float *c4_out_ptr, float *out_ptr, const float *bi
|
|||
return;
|
||||
}
|
||||
|
||||
#ifndef ENABLE_ARM
|
||||
#if !defined(ENABLE_ARM) && !defined(ENABLE_X86_64_SSE)
|
||||
void WinogradTransLeft(const float *S, const float *B, float *M, size_t w, size_t h, size_t k, size_t length) {
|
||||
const int unitStep = 4 * length;
|
||||
for (int y = 0; y < h; ++y) {
|
||||
|
|
|
@ -39,6 +39,13 @@ float ShortToFloat32(uint16_t src_value);
|
|||
|
||||
uint16_t Float32ToShort(float src_value);
|
||||
|
||||
#ifdef ENABLE_X86_64_SSE
|
||||
void PostFuncBiasReluC8(float *dst, const float *src, const float *bias, size_t oc8div, size_t oc8mod,
|
||||
size_t plane_size, size_t stride, size_t relu_type);
|
||||
void PostFuncBiasReluC4(float *dst, const float *src, const float *bias, size_t oc4div, size_t oc4mod,
|
||||
size_t plane_size, size_t plane_stride, size_t relu_type);
|
||||
#endif
|
||||
|
||||
#ifdef ENABLE_ARM
|
||||
void ConvDwFp32Center(float *dst, const float *src, const float *weight, const float *bias, size_t height, size_t width,
|
||||
size_t kernel_h, size_t kernel_w, size_t out_h_step, size_t block_channel, size_t in_sh_step,
|
||||
|
|
|
@ -55,10 +55,10 @@ void MatrixMultiplyWinograd(const float *matix_a, const float *matrix_b, float *
|
|||
src1_j += in_channel;
|
||||
src2_y += n;
|
||||
}
|
||||
_mm_store_ps(matrix_c, dst1);
|
||||
_mm_store_ps(matrix_c + 4, dst2);
|
||||
_mm_store_ps(matrix_c + 8, dst3);
|
||||
_mm_store_ps(matrix_c + 12, dst4);
|
||||
_mm_storeu_ps(matrix_c, dst1);
|
||||
_mm_storeu_ps(matrix_c + 4, dst2);
|
||||
_mm_storeu_ps(matrix_c + 8, dst3);
|
||||
_mm_storeu_ps(matrix_c + 12, dst4);
|
||||
src1_j -= in_channel * k;
|
||||
src1_j += C16NUM;
|
||||
matrix_c += C16NUM;
|
||||
|
@ -80,8 +80,8 @@ void MatrixMultiplyWinograd(const float *matix_a, const float *matrix_b, float *
|
|||
src1_j += in_channel;
|
||||
src2_y += n;
|
||||
}
|
||||
_mm_store_ps(matrix_c, dst1);
|
||||
_mm_store_ps(matrix_c + 4, dst2);
|
||||
_mm_storeu_ps(matrix_c, dst1);
|
||||
_mm_storeu_ps(matrix_c + 4, dst2);
|
||||
src1_j -= in_channel * k;
|
||||
src1_j += C8NUM;
|
||||
matrix_c += C8NUM;
|
||||
|
@ -185,26 +185,26 @@ void MatmulFloatSse64Opt(const float *a, const float *b, float *c, const float *
|
|||
}
|
||||
if (write_mode == 2) { // WriteWino
|
||||
c = dst + WinoSteps2;
|
||||
_mm_store_ps(dst, dst1);
|
||||
_mm_store_ps(dst + 4, dst2);
|
||||
_mm_storeu_ps(dst, dst1);
|
||||
_mm_storeu_ps(dst + 4, dst2);
|
||||
dst += WinoSteps1;
|
||||
_mm_store_ps(dst, dst3);
|
||||
_mm_store_ps(dst + 4, dst4);
|
||||
_mm_storeu_ps(dst, dst3);
|
||||
_mm_storeu_ps(dst + 4, dst4);
|
||||
dst += WinoSteps1;
|
||||
_mm_store_ps(dst, dst5);
|
||||
_mm_store_ps(dst + 4, dst6);
|
||||
_mm_storeu_ps(dst, dst5);
|
||||
_mm_storeu_ps(dst + 4, dst6);
|
||||
dst += WinoSteps1;
|
||||
_mm_store_ps(dst, dst7);
|
||||
_mm_store_ps(dst + 4, dst8);
|
||||
_mm_storeu_ps(dst, dst7);
|
||||
_mm_storeu_ps(dst + 4, dst8);
|
||||
} else if (write_mode == 0) { // WriteC8
|
||||
_mm_store_ps(c, dst1);
|
||||
_mm_store_ps(c + 4, dst2);
|
||||
_mm_store_ps(c + 8, dst3);
|
||||
_mm_store_ps(c + 12, dst4);
|
||||
_mm_store_ps(c + 16, dst5);
|
||||
_mm_store_ps(c + 20, dst6);
|
||||
_mm_store_ps(c + 24, dst7);
|
||||
_mm_store_ps(c + 28, dst8);
|
||||
_mm_storeu_ps(c, dst1);
|
||||
_mm_storeu_ps(c + 4, dst2);
|
||||
_mm_storeu_ps(c + 8, dst3);
|
||||
_mm_storeu_ps(c + 12, dst4);
|
||||
_mm_storeu_ps(c + 16, dst5);
|
||||
_mm_storeu_ps(c + 20, dst6);
|
||||
_mm_storeu_ps(c + 24, dst7);
|
||||
_mm_storeu_ps(c + 28, dst8);
|
||||
c += C8Steps;
|
||||
} else {
|
||||
switch (cc) {
|
||||
|
@ -288,39 +288,39 @@ void MatmulFloatSse64Opt(const float *a, const float *b, float *c, const float *
|
|||
break;
|
||||
case 4: // write4
|
||||
c = dst + 4;
|
||||
_mm_store_ps(dst, dst1);
|
||||
_mm_storeu_ps(dst, dst1);
|
||||
if (r > 1) {
|
||||
dst += stride;
|
||||
_mm_store_ps(dst, dst3);
|
||||
_mm_storeu_ps(dst, dst3);
|
||||
}
|
||||
if (r > 2) {
|
||||
dst += stride;
|
||||
_mm_store_ps(dst, dst5);
|
||||
_mm_storeu_ps(dst, dst5);
|
||||
}
|
||||
if (r > 3) {
|
||||
dst += stride;
|
||||
_mm_store_ps(dst, dst7);
|
||||
_mm_storeu_ps(dst, dst7);
|
||||
dst += stride;
|
||||
dst += 4;
|
||||
}
|
||||
break;
|
||||
case 5: // write5
|
||||
c = dst + 5;
|
||||
_mm_store_ps(dst, dst1);
|
||||
_mm_storeu_ps(dst, dst1);
|
||||
_mm_store_ss(dst + 4, dst2);
|
||||
if (r > 1) {
|
||||
dst += stride;
|
||||
_mm_store_ps(dst, dst3);
|
||||
_mm_storeu_ps(dst, dst3);
|
||||
_mm_store_ss(dst + 4, dst4);
|
||||
}
|
||||
if (r > 2) {
|
||||
dst += stride;
|
||||
_mm_store_ps(dst, dst5);
|
||||
_mm_storeu_ps(dst, dst5);
|
||||
_mm_store_ss(dst + 4, dst6);
|
||||
}
|
||||
if (r > 3) {
|
||||
dst += stride;
|
||||
_mm_store_ps(dst, dst7);
|
||||
_mm_storeu_ps(dst, dst7);
|
||||
_mm_store_ss(dst + 4, dst8);
|
||||
dst += stride;
|
||||
dst += 5;
|
||||
|
@ -328,27 +328,27 @@ void MatmulFloatSse64Opt(const float *a, const float *b, float *c, const float *
|
|||
break;
|
||||
case 6: // write6
|
||||
c = dst + 6;
|
||||
_mm_store_ps(dst, dst1);
|
||||
_mm_storeu_ps(dst, dst1);
|
||||
_mm_store_ss(dst + 4, dst2);
|
||||
dst2 = _mm_shuffle_ps(dst2, dst2, _MM_SHUFFLE(0, 3, 2, 1));
|
||||
_mm_store_ss(dst + 5, dst2);
|
||||
if (r > 1) {
|
||||
dst += stride;
|
||||
_mm_store_ps(dst, dst3);
|
||||
_mm_storeu_ps(dst, dst3);
|
||||
_mm_store_ss(dst + 4, dst4);
|
||||
dst4 = _mm_shuffle_ps(dst4, dst4, _MM_SHUFFLE(0, 3, 2, 1));
|
||||
_mm_store_ss(dst + 5, dst4);
|
||||
}
|
||||
if (r > 2) {
|
||||
dst += stride;
|
||||
_mm_store_ps(dst, dst5);
|
||||
_mm_storeu_ps(dst, dst5);
|
||||
_mm_store_ss(dst + 4, dst6);
|
||||
dst6 = _mm_shuffle_ps(dst6, dst6, _MM_SHUFFLE(0, 3, 2, 1));
|
||||
_mm_store_ss(dst + 5, dst6);
|
||||
}
|
||||
if (r > 3) {
|
||||
dst += stride;
|
||||
_mm_store_ps(dst, dst7);
|
||||
_mm_storeu_ps(dst, dst7);
|
||||
_mm_store_ss(dst + 4, dst8);
|
||||
dst8 = _mm_shuffle_ps(dst8, dst8, _MM_SHUFFLE(0, 3, 2, 1));
|
||||
_mm_store_ss(dst + 5, dst8);
|
||||
|
@ -358,7 +358,7 @@ void MatmulFloatSse64Opt(const float *a, const float *b, float *c, const float *
|
|||
break;
|
||||
case 7: // write7
|
||||
c = dst + 7;
|
||||
_mm_store_ps(dst, dst1);
|
||||
_mm_storeu_ps(dst, dst1);
|
||||
_mm_store_ss(dst + 4, dst2);
|
||||
dst2 = _mm_shuffle_ps(dst2, dst2, _MM_SHUFFLE(0, 3, 2, 1));
|
||||
_mm_store_ss(dst + 5, dst2);
|
||||
|
@ -366,7 +366,7 @@ void MatmulFloatSse64Opt(const float *a, const float *b, float *c, const float *
|
|||
_mm_store_ss(dst + 6, dst2);
|
||||
if (r > 1) {
|
||||
dst += stride;
|
||||
_mm_store_ps(dst, dst3);
|
||||
_mm_storeu_ps(dst, dst3);
|
||||
_mm_store_ss(dst + 4, dst4);
|
||||
dst4 = _mm_shuffle_ps(dst4, dst4, _MM_SHUFFLE(0, 3, 2, 1));
|
||||
_mm_store_ss(dst + 5, dst4);
|
||||
|
@ -375,7 +375,7 @@ void MatmulFloatSse64Opt(const float *a, const float *b, float *c, const float *
|
|||
}
|
||||
if (r > 2) {
|
||||
dst += stride;
|
||||
_mm_store_ps(dst, dst5);
|
||||
_mm_storeu_ps(dst, dst5);
|
||||
_mm_store_ss(dst + 4, dst6);
|
||||
dst6 = _mm_shuffle_ps(dst6, dst6, _MM_SHUFFLE(0, 3, 2, 1));
|
||||
_mm_store_ss(dst + 5, dst6);
|
||||
|
@ -384,7 +384,7 @@ void MatmulFloatSse64Opt(const float *a, const float *b, float *c, const float *
|
|||
}
|
||||
if (r > 3) {
|
||||
dst += stride;
|
||||
_mm_store_ps(dst, dst7);
|
||||
_mm_storeu_ps(dst, dst7);
|
||||
_mm_store_ss(dst + 4, dst8);
|
||||
dst8 = _mm_shuffle_ps(dst8, dst8, _MM_SHUFFLE(0, 3, 2, 1));
|
||||
_mm_store_ss(dst + 5, dst8);
|
||||
|
@ -396,22 +396,22 @@ void MatmulFloatSse64Opt(const float *a, const float *b, float *c, const float *
|
|||
break;
|
||||
default: // write8
|
||||
c = dst + C8NUM;
|
||||
_mm_store_ps(dst, dst1);
|
||||
_mm_store_ps(dst + 4, dst2);
|
||||
_mm_storeu_ps(dst, dst1);
|
||||
_mm_storeu_ps(dst + 4, dst2);
|
||||
if (r > 1) {
|
||||
dst += stride;
|
||||
_mm_store_ps(dst, dst3);
|
||||
_mm_store_ps(dst + 4, dst4);
|
||||
_mm_storeu_ps(dst, dst3);
|
||||
_mm_storeu_ps(dst + 4, dst4);
|
||||
}
|
||||
if (r > 2) {
|
||||
dst += stride;
|
||||
_mm_store_ps(dst, dst5);
|
||||
_mm_store_ps(dst + 4, dst6);
|
||||
_mm_storeu_ps(dst, dst5);
|
||||
_mm_storeu_ps(dst + 4, dst6);
|
||||
}
|
||||
if (r > 3) {
|
||||
dst += stride;
|
||||
_mm_store_ps(dst, dst7);
|
||||
_mm_store_ps(dst + 4, dst8);
|
||||
_mm_storeu_ps(dst, dst7);
|
||||
_mm_storeu_ps(dst + 4, dst8);
|
||||
dst += stride;
|
||||
dst += C8NUM;
|
||||
}
|
||||
|
@ -518,27 +518,27 @@ void MatmulFloatSse64(const float *a, const float *b, float *c, const float *bia
|
|||
dst8 = _mm_max_ps(dst8, zero);
|
||||
}
|
||||
if (WriteWino != 0) { // WriteWino
|
||||
_mm_store_ps(dst, dst1);
|
||||
_mm_store_ps(dst + 4, dst2);
|
||||
_mm_storeu_ps(dst, dst1);
|
||||
_mm_storeu_ps(dst + 4, dst2);
|
||||
dst += WriteWinoSteps;
|
||||
_mm_store_ps(dst, dst3);
|
||||
_mm_store_ps(dst + 4, dst4);
|
||||
_mm_storeu_ps(dst, dst3);
|
||||
_mm_storeu_ps(dst + 4, dst4);
|
||||
dst += WriteWinoSteps;
|
||||
_mm_store_ps(dst, dst5);
|
||||
_mm_store_ps(dst + 4, dst6);
|
||||
_mm_storeu_ps(dst, dst5);
|
||||
_mm_storeu_ps(dst + 4, dst6);
|
||||
dst += WriteWinoSteps;
|
||||
_mm_store_ps(dst, dst7);
|
||||
_mm_store_ps(dst + 4, dst8);
|
||||
_mm_storeu_ps(dst, dst7);
|
||||
_mm_storeu_ps(dst + 4, dst8);
|
||||
dst += WriteWinoSteps;
|
||||
} else if (writeNhwc == 0) { // WriteC8
|
||||
_mm_store_ps(dst, dst1);
|
||||
_mm_store_ps(dst + 4, dst2);
|
||||
_mm_store_ps(dst + 8, dst3);
|
||||
_mm_store_ps(dst + 12, dst4);
|
||||
_mm_store_ps(dst + 16, dst5);
|
||||
_mm_store_ps(dst + 20, dst6);
|
||||
_mm_store_ps(dst + 24, dst7);
|
||||
_mm_store_ps(dst + 28, dst8);
|
||||
_mm_storeu_ps(dst, dst1);
|
||||
_mm_storeu_ps(dst + 4, dst2);
|
||||
_mm_storeu_ps(dst + 8, dst3);
|
||||
_mm_storeu_ps(dst + 12, dst4);
|
||||
_mm_storeu_ps(dst + 16, dst5);
|
||||
_mm_storeu_ps(dst + 20, dst6);
|
||||
_mm_storeu_ps(dst + 24, dst7);
|
||||
_mm_storeu_ps(dst + 28, dst8);
|
||||
dst += 32;
|
||||
c = dst;
|
||||
} else {
|
||||
|
@ -612,68 +612,68 @@ void MatmulFloatSse64(const float *a, const float *b, float *c, const float *bia
|
|||
dst += stride;
|
||||
}
|
||||
case 4: // write4
|
||||
_mm_store_ps(dst, dst1);
|
||||
_mm_storeu_ps(dst, dst1);
|
||||
if (r > 1) {
|
||||
dst += stride;
|
||||
_mm_store_ps(dst, dst3);
|
||||
_mm_storeu_ps(dst, dst3);
|
||||
}
|
||||
if (r > 2) {
|
||||
dst += stride;
|
||||
_mm_store_ps(dst, dst5);
|
||||
_mm_storeu_ps(dst, dst5);
|
||||
}
|
||||
if (r > 3) {
|
||||
dst += stride;
|
||||
_mm_store_ps(dst, dst7);
|
||||
_mm_storeu_ps(dst, dst7);
|
||||
dst += stride;
|
||||
}
|
||||
case 5: // // write5
|
||||
_mm_store_ps(dst, dst1);
|
||||
_mm_storeu_ps(dst, dst1);
|
||||
_mm_store_ss(dst + 4, dst2);
|
||||
if (r > 1) {
|
||||
dst += stride;
|
||||
_mm_store_ps(dst, dst3);
|
||||
_mm_storeu_ps(dst, dst3);
|
||||
_mm_store_ss(dst + 4, dst4);
|
||||
}
|
||||
if (r > 2) {
|
||||
dst += stride;
|
||||
_mm_store_ps(dst, dst5);
|
||||
_mm_storeu_ps(dst, dst5);
|
||||
_mm_store_ss(dst + 4, dst6);
|
||||
}
|
||||
if (r > 3) {
|
||||
dst += stride;
|
||||
_mm_store_ps(dst, dst7);
|
||||
_mm_storeu_ps(dst, dst7);
|
||||
_mm_store_ss(dst + 4, dst8);
|
||||
dst += stride;
|
||||
}
|
||||
case 6: // write6
|
||||
_mm_store_ps(dst, dst1);
|
||||
_mm_storeu_ps(dst, dst1);
|
||||
_mm_store_ss(dst + 4, dst2);
|
||||
dst2 = _mm_shuffle_ps(dst2, dst2, _MM_SHUFFLE(0, 3, 2, 1));
|
||||
_mm_store_ss(dst + 5, dst2);
|
||||
if (r > 1) {
|
||||
dst += stride;
|
||||
_mm_store_ps(dst, dst3);
|
||||
_mm_storeu_ps(dst, dst3);
|
||||
_mm_store_ss(dst + 4, dst4);
|
||||
dst4 = _mm_shuffle_ps(dst4, dst4, _MM_SHUFFLE(0, 3, 2, 1));
|
||||
_mm_store_ss(dst + 5, dst4);
|
||||
}
|
||||
if (r > 2) {
|
||||
dst += stride;
|
||||
_mm_store_ps(dst, dst5);
|
||||
_mm_storeu_ps(dst, dst5);
|
||||
_mm_store_ss(dst + 4, dst6);
|
||||
dst6 = _mm_shuffle_ps(dst6, dst6, _MM_SHUFFLE(0, 3, 2, 1));
|
||||
_mm_store_ss(dst + 5, dst6);
|
||||
}
|
||||
if (r > 3) {
|
||||
dst += stride;
|
||||
_mm_store_ps(dst, dst7);
|
||||
_mm_storeu_ps(dst, dst7);
|
||||
_mm_store_ss(dst + 4, dst8);
|
||||
dst8 = _mm_shuffle_ps(dst8, dst8, _MM_SHUFFLE(0, 3, 2, 1));
|
||||
_mm_store_ss(dst + 5, dst8);
|
||||
dst += stride;
|
||||
}
|
||||
case 7: // write7
|
||||
_mm_store_ps(dst, dst1);
|
||||
_mm_storeu_ps(dst, dst1);
|
||||
_mm_store_ss(dst + 4, dst2);
|
||||
dst2 = _mm_shuffle_ps(dst2, dst2, _MM_SHUFFLE(0, 3, 2, 1));
|
||||
_mm_store_ss(dst + 5, dst2);
|
||||
|
@ -681,7 +681,7 @@ void MatmulFloatSse64(const float *a, const float *b, float *c, const float *bia
|
|||
_mm_store_ss(dst + 6, dst2);
|
||||
if (r > 1) {
|
||||
dst += stride;
|
||||
_mm_store_ps(dst, dst3);
|
||||
_mm_storeu_ps(dst, dst3);
|
||||
_mm_store_ss(dst + 4, dst4);
|
||||
dst4 = _mm_shuffle_ps(dst4, dst4, _MM_SHUFFLE(0, 3, 2, 1));
|
||||
_mm_store_ss(dst + 5, dst4);
|
||||
|
@ -690,7 +690,7 @@ void MatmulFloatSse64(const float *a, const float *b, float *c, const float *bia
|
|||
}
|
||||
if (r > 2) {
|
||||
dst += stride;
|
||||
_mm_store_ps(dst, dst5);
|
||||
_mm_storeu_ps(dst, dst5);
|
||||
_mm_store_ss(dst + 4, dst6);
|
||||
dst6 = _mm_shuffle_ps(dst6, dst6, _MM_SHUFFLE(0, 3, 2, 1));
|
||||
_mm_store_ss(dst + 5, dst6);
|
||||
|
@ -699,7 +699,7 @@ void MatmulFloatSse64(const float *a, const float *b, float *c, const float *bia
|
|||
}
|
||||
if (r > 3) {
|
||||
dst += stride;
|
||||
_mm_store_ps(dst, dst7);
|
||||
_mm_storeu_ps(dst, dst7);
|
||||
_mm_store_ss(dst + 4, dst8);
|
||||
dst8 = _mm_shuffle_ps(dst8, dst8, _MM_SHUFFLE(0, 3, 2, 1));
|
||||
_mm_store_ss(dst + 5, dst8);
|
||||
|
@ -708,22 +708,22 @@ void MatmulFloatSse64(const float *a, const float *b, float *c, const float *bia
|
|||
dst += stride;
|
||||
}
|
||||
default: // write8
|
||||
_mm_store_ps(dst, dst1);
|
||||
_mm_store_ps(dst + 4, dst2);
|
||||
_mm_storeu_ps(dst, dst1);
|
||||
_mm_storeu_ps(dst + 4, dst2);
|
||||
if (r > 1) {
|
||||
dst += stride;
|
||||
_mm_store_ps(dst, dst3);
|
||||
_mm_store_ps(dst + 4, dst4);
|
||||
_mm_storeu_ps(dst, dst3);
|
||||
_mm_storeu_ps(dst + 4, dst4);
|
||||
}
|
||||
if (r > 2) {
|
||||
dst += stride;
|
||||
_mm_store_ps(dst, dst5);
|
||||
_mm_store_ps(dst + 4, dst6);
|
||||
_mm_storeu_ps(dst, dst5);
|
||||
_mm_storeu_ps(dst + 4, dst6);
|
||||
}
|
||||
if (r > 3) {
|
||||
dst += stride;
|
||||
_mm_store_ps(dst, dst7);
|
||||
_mm_store_ps(dst + 4, dst8);
|
||||
_mm_storeu_ps(dst, dst7);
|
||||
_mm_storeu_ps(dst + 4, dst8);
|
||||
dst += stride;
|
||||
}
|
||||
}
|
||||
|
|
|
@ -0,0 +1,173 @@
|
|||
/**
|
||||
* Copyright 2020 Huawei Technologies Co., Ltd
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
#ifdef ENABLE_X86_64_SSE
|
||||
#include <nmmintrin.h>
|
||||
#include "nnacl/fp32/common_func.h"
|
||||
|
||||
void PostFuncBiasReluC8(float *dst, const float *src, const float *bias, size_t oc8div, size_t oc8mod,
|
||||
size_t plane_size, size_t stride, size_t relu_type) {
|
||||
__m128 relu6 = _mm_set_ps1(6.0);
|
||||
__m128 zero = _mm_setzero_ps();
|
||||
stride /= sizeof(float);
|
||||
for (int loop_c8 = 0; !(loop_c8 == oc8div); loop_c8 += C8NUM) {
|
||||
size_t plane_size_tmp = plane_size;
|
||||
float *dst_c8 = dst + loop_c8;
|
||||
__m128 bias1 = _mm_setzero_ps();
|
||||
__m128 bias2 = _mm_setzero_ps();
|
||||
if (bias != NULL) {
|
||||
bias1 = _mm_loadu_ps(bias);
|
||||
bias2 = _mm_loadu_ps(bias + 4);
|
||||
bias += 8;
|
||||
}
|
||||
for (; plane_size_tmp >= C4NUM; plane_size_tmp -= C4NUM) {
|
||||
__m128 src1 = _mm_loadu_ps(src);
|
||||
__m128 src2 = _mm_loadu_ps(src + 4);
|
||||
__m128 src3 = _mm_loadu_ps(src + 8);
|
||||
__m128 src4 = _mm_loadu_ps(src + 12);
|
||||
__m128 src5 = _mm_loadu_ps(src + 16);
|
||||
__m128 src6 = _mm_loadu_ps(src + 20);
|
||||
__m128 src7 = _mm_loadu_ps(src + 24);
|
||||
__m128 src8 = _mm_loadu_ps(src + 28);
|
||||
src += 32;
|
||||
src1 = _mm_add_ps(src1, bias1);
|
||||
src2 = _mm_add_ps(src2, bias2);
|
||||
src3 = _mm_add_ps(src3, bias1);
|
||||
src4 = _mm_add_ps(src4, bias2);
|
||||
src5 = _mm_add_ps(src5, bias1);
|
||||
src6 = _mm_add_ps(src6, bias2);
|
||||
src7 = _mm_add_ps(src7, bias1);
|
||||
src8 = _mm_add_ps(src8, bias2);
|
||||
switch (relu_type) {
|
||||
case 3:
|
||||
src1 = _mm_min_ps(src1, relu6);
|
||||
src2 = _mm_min_ps(src2, relu6);
|
||||
src3 = _mm_min_ps(src3, relu6);
|
||||
src4 = _mm_min_ps(src4, relu6);
|
||||
src5 = _mm_min_ps(src5, relu6);
|
||||
src6 = _mm_min_ps(src6, relu6);
|
||||
src7 = _mm_min_ps(src7, relu6);
|
||||
src8 = _mm_min_ps(src8, relu6);
|
||||
case 1:
|
||||
src1 = _mm_max_ps(src1, zero);
|
||||
src2 = _mm_max_ps(src2, zero);
|
||||
src3 = _mm_max_ps(src3, zero);
|
||||
src4 = _mm_max_ps(src4, zero);
|
||||
src5 = _mm_max_ps(src5, zero);
|
||||
src6 = _mm_max_ps(src6, zero);
|
||||
src7 = _mm_max_ps(src7, zero);
|
||||
src8 = _mm_max_ps(src8, zero);
|
||||
break;
|
||||
}
|
||||
_mm_storeu_ps(dst_c8, src1);
|
||||
_mm_storeu_ps(dst_c8 + 4, src2);
|
||||
dst_c8 += stride;
|
||||
_mm_storeu_ps(dst_c8, src3);
|
||||
_mm_storeu_ps(dst_c8 + 4, src4);
|
||||
dst_c8 += stride;
|
||||
_mm_storeu_ps(dst_c8, src5);
|
||||
_mm_storeu_ps(dst_c8 + 4, src6);
|
||||
dst_c8 += stride;
|
||||
_mm_storeu_ps(dst_c8, src7);
|
||||
_mm_storeu_ps(dst_c8 + 4, src8);
|
||||
dst_c8 += stride;
|
||||
}
|
||||
for (; plane_size_tmp > 0; plane_size_tmp -= 1) {
|
||||
__m128 src1 = _mm_loadu_ps(src);
|
||||
__m128 src2 = _mm_loadu_ps(src + 4);
|
||||
src1 = _mm_add_ps(src1, bias1);
|
||||
src2 = _mm_add_ps(src2, bias2);
|
||||
switch (relu_type) {
|
||||
case 3:
|
||||
src1 = _mm_min_ps(src1, relu6);
|
||||
src2 = _mm_min_ps(src2, relu6);
|
||||
case 1:
|
||||
src1 = _mm_max_ps(src1, zero);
|
||||
src2 = _mm_max_ps(src2, zero);
|
||||
break;
|
||||
}
|
||||
_mm_storeu_ps(dst_c8, src1);
|
||||
_mm_storeu_ps(dst_c8 + 4, src2);
|
||||
dst_c8 += stride;
|
||||
src += 8;
|
||||
}
|
||||
}
|
||||
if (oc8mod == 0) {
|
||||
return;
|
||||
}
|
||||
__m128 bias1 = _mm_setzero_ps();
|
||||
__m128 bias2 = _mm_setzero_ps();
|
||||
if (bias != NULL) {
|
||||
bias1 = _mm_loadu_ps(bias);
|
||||
bias2 = _mm_loadu_ps(bias + 4);
|
||||
bias += 8;
|
||||
}
|
||||
float *dst_c1 = dst + oc8div;
|
||||
for (size_t plane_size_tmp = plane_size; plane_size_tmp > 0; plane_size_tmp -= 1) {
|
||||
__m128 src1 = _mm_loadu_ps(src);
|
||||
__m128 src2 = _mm_loadu_ps(src + 4);
|
||||
src += 8;
|
||||
src1 = _mm_add_ps(src1, bias1);
|
||||
src2 = _mm_add_ps(src2, bias2);
|
||||
switch (relu_type) {
|
||||
case 3:
|
||||
src1 = _mm_min_ps(src1, relu6);
|
||||
src2 = _mm_min_ps(src2, relu6);
|
||||
case 1:
|
||||
src1 = _mm_max_ps(src1, zero);
|
||||
src2 = _mm_max_ps(src2, zero);
|
||||
break;
|
||||
}
|
||||
switch (oc8mod) {
|
||||
case 1:
|
||||
_mm_store_ss(dst_c1, src1);
|
||||
dst_c1 += stride;
|
||||
break;
|
||||
case 2:
|
||||
_mm_storel_pi((__m64 *)(dst_c1), src1);
|
||||
dst_c1 += stride;
|
||||
break;
|
||||
case 3:
|
||||
_mm_storel_pi((__m64 *)(dst_c1), src1);
|
||||
src1 = _mm_unpackhi_ps(src1, src1);
|
||||
_mm_store_ss(dst_c1 + 2, src1);
|
||||
dst_c1 += stride;
|
||||
break;
|
||||
case 4:
|
||||
_mm_storeu_ps(dst_c1, src1);
|
||||
dst_c1 += stride;
|
||||
break;
|
||||
case 5:
|
||||
_mm_storeu_ps(dst_c1, src1);
|
||||
_mm_store_ss(dst_c1 + 4, src2);
|
||||
dst_c1 += stride;
|
||||
break;
|
||||
case 6:
|
||||
_mm_storeu_ps(dst_c1, src1);
|
||||
_mm_storel_pi((__m64 *)(dst_c1 + 4), src2);
|
||||
dst_c1 += stride;
|
||||
break;
|
||||
case 7:
|
||||
_mm_storeu_ps(dst_c1, src1);
|
||||
_mm_storel_pi((__m64 *)(dst_c1 + 4), src2);
|
||||
src2 = _mm_unpackhi_ps(src2, src2);
|
||||
_mm_store_ss(dst_c1 + 6, src2);
|
||||
dst_c1 += stride;
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
#endif
|
|
@ -0,0 +1,258 @@
|
|||
/**
|
||||
* Copyright 2020 Huawei Technologies Co., Ltd
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
#ifdef ENABLE_X86_64_SSE
|
||||
#include <nmmintrin.h>
|
||||
#include "nnacl/fp32/common_func.h"
|
||||
|
||||
void WinogradTransLeft(const float *S, const float *B, float *M, size_t w, size_t h, size_t k, size_t length) {
|
||||
size_t len_c4 = length * 4;
|
||||
size_t S_step = length * w * 4;
|
||||
for (int h1 = 0; h1 < h; ++h1) {
|
||||
const float *SW = S;
|
||||
for (int w_tmp = w; w_tmp > 0; --w_tmp) {
|
||||
const float *SK = SW;
|
||||
const float *BK = B;
|
||||
memset(M, 0, len_c4 * sizeof(float));
|
||||
int k_tmp = k;
|
||||
for (; k_tmp >= 7; k_tmp -= 7) {
|
||||
__m128 k1 = _mm_load_ps1(BK);
|
||||
__m128 k2 = _mm_load_ps1(BK + h);
|
||||
__m128 k3 = _mm_load_ps1(BK + 2 * h);
|
||||
__m128 k4 = _mm_load_ps1(BK + 3 * h);
|
||||
__m128 k5 = _mm_load_ps1(BK + 4 * h);
|
||||
__m128 k6 = _mm_load_ps1(BK + 5 * h);
|
||||
__m128 k7 = _mm_load_ps1(BK + 6 * h);
|
||||
BK += 7 * h;
|
||||
for (int len_tmp = length; len_tmp > 0; --len_tmp) {
|
||||
__m128 M1 = _mm_loadu_ps(M);
|
||||
__m128 s0 = _mm_loadu_ps(SK);
|
||||
M1 = _mm_add_ps(M1, _mm_mul_ps(s0, k1));
|
||||
__m128 s1 = _mm_loadu_ps(SK + S_step);
|
||||
s1 = _mm_mul_ps(s1, k2);
|
||||
__m128 s3 = _mm_loadu_ps(SK + 2 * S_step);
|
||||
M1 = _mm_add_ps(M1, _mm_mul_ps(s3, k3));
|
||||
__m128 s4 = _mm_loadu_ps(SK + 3 * S_step);
|
||||
s1 = _mm_add_ps(s1, _mm_mul_ps(s4, k4));
|
||||
__m128 s5 = _mm_loadu_ps(SK + 4 * S_step);
|
||||
M1 = _mm_add_ps(M1, _mm_mul_ps(s5, k5));
|
||||
__m128 s6 = _mm_loadu_ps(SK + 5 * S_step);
|
||||
s1 = _mm_add_ps(s1, _mm_mul_ps(s6, k6));
|
||||
__m128 s7 = _mm_loadu_ps(SK + 6 * S_step);
|
||||
M1 = _mm_add_ps(M1, _mm_mul_ps(s7, k7));
|
||||
M1 = _mm_add_ps(M1, s1);
|
||||
_mm_storeu_ps(M, M1);
|
||||
M += 4;
|
||||
SK += 4;
|
||||
}
|
||||
M -= len_c4;
|
||||
SK += 7 * S_step - len_c4;
|
||||
}
|
||||
for (; k_tmp >= 4; k_tmp -= 4) {
|
||||
__m128 k1 = _mm_load_ps1(BK);
|
||||
__m128 k2 = _mm_load_ps1(BK + h);
|
||||
__m128 k3 = _mm_load_ps1(BK + 2 * h);
|
||||
__m128 k4 = _mm_load_ps1(BK + 3 * h);
|
||||
BK += 4 * h;
|
||||
for (int len_tmp = length; len_tmp > 0; --len_tmp) {
|
||||
__m128 M1 = _mm_loadu_ps(M);
|
||||
__m128 s0 = _mm_loadu_ps(SK);
|
||||
M1 = _mm_add_ps(M1, _mm_mul_ps(s0, k1));
|
||||
__m128 s1 = _mm_loadu_ps(SK + S_step);
|
||||
s1 = _mm_mul_ps(s1, k2);
|
||||
__m128 s3 = _mm_loadu_ps(SK + 2 * S_step);
|
||||
M1 = _mm_add_ps(M1, _mm_mul_ps(s3, k3));
|
||||
__m128 s4 = _mm_loadu_ps(SK + 3 * S_step);
|
||||
s1 = _mm_add_ps(s1, _mm_mul_ps(s4, k4));
|
||||
M1 = _mm_add_ps(M1, s1);
|
||||
_mm_storeu_ps(M, M1);
|
||||
SK += 4;
|
||||
M += 4;
|
||||
}
|
||||
M -= len_c4;
|
||||
SK += 4 * S_step - len_c4;
|
||||
}
|
||||
for (; k_tmp >= 3; k_tmp -= 3) {
|
||||
__m128 k1 = _mm_load_ps1(BK);
|
||||
__m128 k2 = _mm_load_ps1(BK + h);
|
||||
__m128 k3 = _mm_load_ps1(BK + 2 * h);
|
||||
BK += 3 * h;
|
||||
for (int len_tmp = length; len_tmp > 0; --len_tmp) {
|
||||
__m128 M1 = _mm_loadu_ps(M);
|
||||
__m128 s0 = _mm_loadu_ps(SK);
|
||||
M1 = _mm_add_ps(M1, _mm_mul_ps(s0, k1));
|
||||
__m128 s1 = _mm_loadu_ps(SK + S_step);
|
||||
s1 = _mm_mul_ps(s1, k2);
|
||||
__m128 s3 = _mm_loadu_ps(SK + 2 * S_step);
|
||||
M1 = _mm_add_ps(M1, _mm_mul_ps(s3, k3));
|
||||
M1 = _mm_add_ps(M1, s1);
|
||||
_mm_storeu_ps(M, M1);
|
||||
SK += 4;
|
||||
M += 4;
|
||||
}
|
||||
M -= len_c4;
|
||||
SK += 3 * S_step - len_c4;
|
||||
}
|
||||
for (; k_tmp > 0; k_tmp -= 1) {
|
||||
__m128 k1 = _mm_load_ps1(BK);
|
||||
BK += h;
|
||||
for (int len_tmp = length; len_tmp > 0; --len_tmp) {
|
||||
__m128 M1 = _mm_loadu_ps(M);
|
||||
__m128 s0 = _mm_loadu_ps(SK);
|
||||
M1 = _mm_add_ps(M1, _mm_mul_ps(s0, k1));
|
||||
_mm_storeu_ps(M, M1);
|
||||
SK += 4;
|
||||
M += 4;
|
||||
}
|
||||
M -= len_c4;
|
||||
SK += S_step - len_c4;
|
||||
}
|
||||
SW += len_c4;
|
||||
M += len_c4;
|
||||
}
|
||||
B += 1;
|
||||
}
|
||||
}
|
||||
|
||||
void WinogradTransRight(const float *S, const float *B, float *M, size_t w, size_t h, size_t k, size_t length) {
|
||||
size_t len_c4 = length * 4;
|
||||
size_t k_step = len_c4 * k;
|
||||
for (int h1 = 0; h1 < h; ++h1) {
|
||||
const float *BW = B;
|
||||
for (int ww = 0; ww < w; ++ww) {
|
||||
const float *SK = S; // r0
|
||||
const float *BK = BW; // r1
|
||||
memset(M, 0, len_c4 * sizeof(float));
|
||||
int k_tmp = k;
|
||||
for (; k_tmp >= 7; k_tmp -= 7) {
|
||||
__m128 k1 = _mm_load_ps1(BK);
|
||||
__m128 k2 = _mm_load_ps1(BK + h);
|
||||
__m128 k3 = _mm_load_ps1(BK + 2 * h);
|
||||
__m128 k4 = _mm_load_ps1(BK + 3 * h);
|
||||
__m128 k5 = _mm_load_ps1(BK + 4 * h);
|
||||
__m128 k6 = _mm_load_ps1(BK + 5 * h);
|
||||
__m128 k7 = _mm_load_ps1(BK + 6 * h);
|
||||
BK += 7 * h;
|
||||
const float *S2 = SK + len_c4;
|
||||
const float *S3 = S2 + len_c4;
|
||||
const float *S4 = S3 + len_c4;
|
||||
const float *S5 = S4 + len_c4;
|
||||
const float *S6 = S5 + len_c4;
|
||||
const float *S7 = S6 + len_c4;
|
||||
for (int len_tmp = length; len_tmp > 0; --len_tmp) {
|
||||
__m128 M1 = _mm_loadu_ps(M);
|
||||
__m128 s0 = _mm_loadu_ps(SK);
|
||||
M1 = _mm_add_ps(M1, _mm_mul_ps(s0, k1));
|
||||
__m128 s1 = _mm_loadu_ps(S2);
|
||||
s1 = _mm_mul_ps(s1, k2);
|
||||
__m128 s3 = _mm_loadu_ps(S3);
|
||||
M1 = _mm_add_ps(M1, _mm_mul_ps(s3, k3));
|
||||
__m128 s4 = _mm_loadu_ps(S4);
|
||||
s1 = _mm_add_ps(s1, _mm_mul_ps(s4, k4));
|
||||
__m128 s5 = _mm_loadu_ps(S5);
|
||||
M1 = _mm_add_ps(M1, _mm_mul_ps(s5, k5));
|
||||
__m128 s6 = _mm_loadu_ps(S6);
|
||||
s1 = _mm_add_ps(s1, _mm_mul_ps(s6, k6));
|
||||
__m128 s7 = _mm_loadu_ps(S7);
|
||||
M1 = _mm_add_ps(M1, _mm_mul_ps(s7, k7));
|
||||
M1 = _mm_add_ps(M1, s1);
|
||||
_mm_storeu_ps(M, M1);
|
||||
M += 4;
|
||||
SK += 4;
|
||||
S2 += 4;
|
||||
S3 += 4;
|
||||
S4 += 4;
|
||||
S5 += 4;
|
||||
S6 += 4;
|
||||
S7 += 4;
|
||||
}
|
||||
M -= len_c4;
|
||||
SK = S7;
|
||||
}
|
||||
for (; k_tmp >= 4; k_tmp -= 4) {
|
||||
__m128 k1 = _mm_load_ps1(BK);
|
||||
__m128 k2 = _mm_load_ps1(BK + h);
|
||||
__m128 k3 = _mm_load_ps1(BK + 2 * h);
|
||||
__m128 k4 = _mm_load_ps1(BK + 3 * h);
|
||||
BK += 4 * h;
|
||||
const float *S2 = SK + len_c4;
|
||||
const float *S3 = S2 + len_c4;
|
||||
const float *S4 = S3 + len_c4;
|
||||
for (int len_tmp = length; len_tmp > 0; --len_tmp) {
|
||||
__m128 M1 = _mm_loadu_ps(M);
|
||||
__m128 s0 = _mm_loadu_ps(SK);
|
||||
M1 = _mm_add_ps(M1, _mm_mul_ps(s0, k1));
|
||||
__m128 s1 = _mm_loadu_ps(S2);
|
||||
s1 = _mm_mul_ps(s1, k2);
|
||||
__m128 s3 = _mm_loadu_ps(S3);
|
||||
M1 = _mm_add_ps(M1, _mm_mul_ps(s3, k3));
|
||||
__m128 s4 = _mm_loadu_ps(S4);
|
||||
s1 = _mm_add_ps(s1, _mm_mul_ps(s4, k4));
|
||||
M1 = _mm_add_ps(M1, s1);
|
||||
_mm_storeu_ps(M, M1);
|
||||
M += 4;
|
||||
SK += 4;
|
||||
S2 += 4;
|
||||
S3 += 4;
|
||||
S4 += 4;
|
||||
}
|
||||
M -= len_c4;
|
||||
SK = S4;
|
||||
}
|
||||
for (; k_tmp >= 3; k_tmp -= 3) {
|
||||
__m128 k1 = _mm_load_ps1(BK);
|
||||
__m128 k2 = _mm_load_ps1(BK + h);
|
||||
__m128 k3 = _mm_load_ps1(BK + 2 * h);
|
||||
BK += 3 * h;
|
||||
const float *S2 = SK + len_c4;
|
||||
const float *S3 = S2 + len_c4;
|
||||
for (int len_tmp = length; len_tmp > 0; --len_tmp) {
|
||||
__m128 M1 = _mm_loadu_ps(M);
|
||||
__m128 s0 = _mm_loadu_ps(SK);
|
||||
M1 = _mm_add_ps(M1, _mm_mul_ps(s0, k1));
|
||||
__m128 s1 = _mm_loadu_ps(S2);
|
||||
s1 = _mm_mul_ps(s1, k2);
|
||||
__m128 s3 = _mm_loadu_ps(S3);
|
||||
M1 = _mm_add_ps(M1, _mm_mul_ps(s3, k3));
|
||||
M1 = _mm_add_ps(M1, s1);
|
||||
_mm_storeu_ps(M, M1);
|
||||
M += 4;
|
||||
SK += 4;
|
||||
S2 += 4;
|
||||
S3 += 4;
|
||||
}
|
||||
M -= len_c4;
|
||||
SK = S3;
|
||||
}
|
||||
for (; k_tmp >= 1; k_tmp -= 1) {
|
||||
__m128 k1 = _mm_load_ps1(BK);
|
||||
BK += h;
|
||||
for (int len_tmp = length; len_tmp > 0; --len_tmp) {
|
||||
__m128 M1 = _mm_loadu_ps(M);
|
||||
__m128 s0 = _mm_loadu_ps(SK);
|
||||
M1 = _mm_add_ps(M1, _mm_mul_ps(s0, k1));
|
||||
_mm_storeu_ps(M, M1);
|
||||
M += 4;
|
||||
SK += 4;
|
||||
}
|
||||
M -= len_c4;
|
||||
}
|
||||
BW += 1;
|
||||
M += len_c4;
|
||||
}
|
||||
S += k_step;
|
||||
}
|
||||
}
|
||||
#endif
|
|
@ -5,6 +5,7 @@ function Run_Converter() {
|
|||
# Unzip x86 runtime and convertor
|
||||
cd ${x86_path} || exit 1
|
||||
tar -zxf mindspore-lite-${version}-runtime-x86-${process_unit_x86}.tar.gz || exit 1
|
||||
tar -zxf mindspore-lite-${version}-runtime-x86-sse-${process_unit_x86}.tar.gz || exit 1
|
||||
|
||||
tar -zxf mindspore-lite-${version}-converter-ubuntu.tar.gz || exit 1
|
||||
cd ${x86_path}/mindspore-lite-${version}-converter-ubuntu || exit 1
|
||||
|
@ -480,6 +481,234 @@ function Run_x86() {
|
|||
done < ${models_only_for_process_config}
|
||||
}
|
||||
|
||||
# Run on x86 sse platform:
|
||||
function Run_x86_sse() {
|
||||
# Run tflite converted models:
|
||||
while read line; do
|
||||
model_name=${line}
|
||||
if [[ $model_name == \#* ]]; then
|
||||
continue
|
||||
fi
|
||||
echo ${model_name} >> "${run_x86_sse_log_file}"
|
||||
echo 'cd '${x86_path}'/mindspore-lite-'${version}'-runtime-x86-sse-'${process_unit_x86} >> "{run_x86_sse_log_file}"
|
||||
cd ${x86_path}/mindspore-lite-${version}-runtime-x86-sse-${process_unit_x86} || return 1
|
||||
echo 'export LD_LIBRARY_PATH=$LD_LIBRARY_PATH:./lib:./third_party/libjpeg-turbo/lib:./third_party/opencv/lib;./benchmark/benchmark --modelFile='${ms_models_path}'/'${model_name}'.ms --inDataFile=/home/workspace/mindspore_dataset/mslite/models/hiai/input_output/input/'${model_name}'.ms.bin --benchmarkDataFile=/home/workspace/mindspore_dataset/mslite/models/hiai/input_output/output/'${model_name}'.ms.out' >> "${run_x86_sse_log_file}"
|
||||
export LD_LIBRARY_PATH=$LD_LIBRARY_PATH:./lib:./third_party/libjpeg-turbo/lib:./third_party/opencv/lib;./benchmark/benchmark --modelFile=${ms_models_path}/${model_name}.ms --inDataFile=/home/workspace/mindspore_dataset/mslite/models/hiai/input_output/input/${model_name}.ms.bin --benchmarkDataFile=/home/workspace/mindspore_dataset/mslite/models/hiai/input_output/output/${model_name}.ms.out >> "${run_x86_sse_log_file}"
|
||||
if [ $? = 0 ]; then
|
||||
run_result='x86_sse: '${model_name}' pass'; echo ${run_result} >> ${run_benchmark_result_file}
|
||||
else
|
||||
run_result='x86_sse: '${model_name}' failed'; echo ${run_result} >> ${run_benchmark_result_file}; return 1
|
||||
fi
|
||||
done < ${models_tflite_config}
|
||||
|
||||
# Run caffe converted models:
|
||||
while read line; do
|
||||
model_name=${line}
|
||||
if [[ $model_name == \#* ]]; then
|
||||
continue
|
||||
fi
|
||||
echo ${model_name} >> "${run_x86_sse_log_file}"
|
||||
echo 'cd '${x86_path}'/mindspore-lite-'${version}'-runtime-x86-sse-'${process_unit_x86} >> "${run_x86_sse_log_file}"
|
||||
cd ${x86_path}/mindspore-lite-${version}-runtime-x86-sse-${process_unit_x86} || return 1
|
||||
echo 'export LD_LIBRARY_PATH=$LD_LIBRARY_PATH:./lib:./third_party/libjpeg-turbo/lib:./third_party/opencv/lib;./benchmark/benchmark --modelFile='${ms_models_path}'/'${model_name}'.ms --inDataFile=/home/workspace/mindspore_dataset/mslite/models/hiai/input_output/input/'${model_name}'.ms.bin --benchmarkDataFile=/home/workspace/mindspore_dataset/mslite/models/hiai/input_output/output/'${model_name}'.ms.out' >> "${run_x86_sse_log_file}"
|
||||
export LD_LIBRARY_PATH=$LD_LIBRARY_PATH:./lib:./third_party/libjpeg-turbo/lib:./third_party/opencv/lib;./benchmark/benchmark --modelFile=${ms_models_path}/${model_name}.ms --inDataFile=/home/workspace/mindspore_dataset/mslite/models/hiai/input_output/input/${model_name}.ms.bin --benchmarkDataFile=/home/workspace/mindspore_dataset/mslite/models/hiai/input_output/output/${model_name}.ms.out >> "${run_x86_sse_log_file}"
|
||||
if [ $? = 0 ]; then
|
||||
run_result='x86_sse: '${model_name}' pass'; echo ${run_result} >> ${run_benchmark_result_file}
|
||||
else
|
||||
run_result='x86_sse: '${model_name}' failed'; echo ${run_result} >> ${run_benchmark_result_file}; return 1
|
||||
fi
|
||||
done < ${models_caffe_config}
|
||||
|
||||
# Run onnx converted models:
|
||||
while read line; do
|
||||
model_name=${line%;*}
|
||||
length=${#model_name}
|
||||
input_shapes=${line:length+1}
|
||||
if [[ $model_name == \#* ]]; then
|
||||
continue
|
||||
fi
|
||||
echo ${model_name} >> "${run_x86_sse_log_file}"
|
||||
echo 'cd '${x86_path}'/mindspore-lite-'${version}'-runtime-x86-sse-'${process_unit_x86} >> "${run_x86_sse_log_file}"
|
||||
cd ${x86_path}/mindspore-lite-${version}-runtime-x86-sse-${process_unit_x86} || return 1
|
||||
echo 'export LD_LIBRARY_PATH=$LD_LIBRARY_PATH:./lib:./third_party/libjpeg-turbo/lib:./third_party/opencv/lib;./benchmark/benchmark --modelFile='${ms_models_path}'/'${model_name}'.ms --inDataFile=/home/workspace/mindspore_dataset/mslite/models/hiai/input_output/input/'${model_name}'.ms.bin --inputShapes='${input_shapes}' --benchmarkDataFile=/home/workspace/mindspore_dataset/mslite/models/hiai/input_output/output/'${model_name}'.ms.out' >> "${run_x86_sse_log_file}"
|
||||
export LD_LIBRARY_PATH=$LD_LIBRARY_PATH:./lib:./third_party/libjpeg-turbo/lib:./third_party/opencv/lib;./benchmark/benchmark --modelFile=${ms_models_path}/${model_name}.ms --inDataFile=/home/workspace/mindspore_dataset/mslite/models/hiai/input_output/input/${model_name}.ms.bin --inputShapes=${input_shapes} --benchmarkDataFile=/home/workspace/mindspore_dataset/mslite/models/hiai/input_output/output/${model_name}.ms.out >> "${run_x86_sse_log_file}"
|
||||
if [ $? = 0 ]; then
|
||||
run_result='x86_sse: '${model_name}' pass'; echo ${run_result} >> ${run_benchmark_result_file}
|
||||
else
|
||||
run_result='x86_sse: '${model_name}' failed'; echo ${run_result} >> ${run_benchmark_result_file}; return 1
|
||||
fi
|
||||
done < ${models_onnx_config}
|
||||
|
||||
# Run tflite post training quantization converted models:
|
||||
while read line; do
|
||||
model_name=${line}
|
||||
if [[ $model_name == \#* ]]; then
|
||||
continue
|
||||
fi
|
||||
echo ${model_name} >> "${run_x86_sse_log_file}"
|
||||
echo 'cd '${x86_path}'/mindspore-lite-'${version}'-runtime-x86-sse-'${process_unit_x86} >> "${run_x86_sse_log_file}"
|
||||
cd ${x86_path}/mindspore-lite-${version}-runtime-x86-sse-${process_unit_x86} || return 1
|
||||
echo 'export LD_LIBRARY_PATH=$LD_LIBRARY_PATH:./lib:./third_party/libjpeg-turbo/lib:./third_party/opencv/lib;./benchmark/benchmark --modelFile='${ms_models_path}'/'${model_name}'_posttraining.ms --inDataFile=/home/workspace/mindspore_dataset/mslite/quantTraining/mnist_calibration_data/00099.bin --benchmarkDataFile=/home/workspace/mindspore_dataset/mslite/models/hiai/input_output/output/'${model_name}'_posttraining.ms.out' >> "${run_x86_sse_log_file}"
|
||||
export LD_LIBRARY_PATH=$LD_LIBRARY_PATH:./lib:./third_party/libjpeg-turbo/lib:./third_party/opencv/lib;./benchmark/benchmark --modelFile=${ms_models_path}/${model_name}_posttraining.ms --inDataFile=/home/workspace/mindspore_dataset/mslite/quantTraining/mnist_calibration_data/00099.bin --benchmarkDataFile=/home/workspace/mindspore_dataset/mslite/models/hiai/input_output/output/${model_name}_posttraining.ms.out >> "${run_x86_sse_log_file}"
|
||||
if [ $? = 0 ]; then
|
||||
run_result='x86_sse: '${model_name}' pass'; echo ${run_result} >> ${run_benchmark_result_file}
|
||||
else
|
||||
run_result='x86_sse: '${model_name}' failed'; echo ${run_result} >> ${run_benchmark_result_file}; return 1
|
||||
fi
|
||||
done < ${models_tflite_posttraining_config}
|
||||
|
||||
# Run caffe post training quantization converted models:
|
||||
while read line; do
|
||||
model_name=${line}
|
||||
if [[ $model_name == \#* ]]; then
|
||||
continue
|
||||
fi
|
||||
echo ${model_name} >> "${run_x86_sse_log_file}"
|
||||
echo 'cd '${x86_path}'/mindspore-lite-'${version}'-runtime-x86-sse-'${process_unit_x86} >> "${run_x86_sse_log_file}"
|
||||
cd ${x86_path}/mindspore-lite-${version}-runtime-x86-sse-${process_unit_x86} || return 1
|
||||
echo 'export LD_LIBRARY_PATH=$LD_LIBRARY_PATH:./lib:./third_party/libjpeg-turbo/lib:./third_party/opencv/lib;./benchmark/benchmark --modelFile='${ms_models_path}'/'${model_name}'_posttraining.ms --inDataFile=/home/workspace/mindspore_dataset/mslite/quantTraining/ml_face_mnet_calibration_data/20_Family_Group_Family_Group_20_1001.bin --benchmarkDataFile=/home/workspace/mindspore_dataset/mslite/models/hiai/input_output/output/'${model_name}'_posttraining.ms.out' --accuracyThreshold=105 >> "${run_x86_sse_log_file}"
|
||||
export LD_LIBRARY_PATH=$LD_LIBRARY_PATH:./lib:./third_party/libjpeg-turbo/lib:./third_party/opencv/lib;./benchmark/benchmark --modelFile=${ms_models_path}/${model_name}_posttraining.ms --inDataFile=/home/workspace/mindspore_dataset/mslite/quantTraining/ml_face_mnet_calibration_data/20_Family_Group_Family_Group_20_1001.bin --benchmarkDataFile=/home/workspace/mindspore_dataset/mslite/models/hiai/input_output/output/${model_name}_posttraining.ms.out --accuracyThreshold=105 >> "${run_x86_sse_log_file}"
|
||||
if [ $? = 0 ]; then
|
||||
run_result='x86_sse: '${model_name}' pass'; echo ${run_result} >> ${run_benchmark_result_file}
|
||||
else
|
||||
run_result='x86_sse: '${model_name}' failed'; echo ${run_result} >> ${run_benchmark_result_file}; return 1
|
||||
fi
|
||||
done < ${models_caffe_posttraining_config}
|
||||
|
||||
# Run tflite aware training quantization converted models:
|
||||
while read line; do
|
||||
model_name=${line}
|
||||
if [[ $model_name == \#* ]]; then
|
||||
continue
|
||||
fi
|
||||
echo ${model_name} >> "${run_x86_sse_log_file}"
|
||||
echo 'cd '${x86_path}'/mindspore-lite-'${version}'-runtime-x86-sse-'${process_unit_x86} >> "${run_x86_sse_log_file}"
|
||||
cd ${x86_path}/mindspore-lite-${version}-runtime-x86-sse-${process_unit_x86} || return 1
|
||||
echo 'export LD_LIBRARY_PATH=$LD_LIBRARY_PATH:./lib:./third_party/libjpeg-turbo/lib:./third_party/opencv/lib;./benchmark/benchmark --modelFile='${ms_models_path}'/'${model_name}'.ms --inDataFile=/home/workspace/mindspore_dataset/mslite/models/hiai/input_output/input/'${model_name}'.ms.bin --benchmarkDataFile=/home/workspace/mindspore_dataset/mslite/models/hiai/input_output/output/'${model_name}'.ms.out' >> "${run_x86_sse_log_file}"
|
||||
export LD_LIBRARY_PATH=$LD_LIBRARY_PATH:./lib:./third_party/libjpeg-turbo/lib:./third_party/opencv/lib;./benchmark/benchmark --modelFile=${ms_models_path}/${model_name}.ms --inDataFile=/home/workspace/mindspore_dataset/mslite/models/hiai/input_output/input/${model_name}.ms.bin --benchmarkDataFile=/home/workspace/mindspore_dataset/mslite/models/hiai/input_output/output/${model_name}.ms.out >> "${run_x86_sse_log_file}"
|
||||
if [ $? = 0 ]; then
|
||||
run_result='x86_sse: '${model_name}' pass'; echo ${run_result} >> ${run_benchmark_result_file}
|
||||
else
|
||||
run_result='x86_sse: '${model_name}' failed'; echo ${run_result} >> ${run_benchmark_result_file}; return 1
|
||||
fi
|
||||
done < ${models_tflite_awaretraining_config}
|
||||
|
||||
# Run mindspore converted train models:
|
||||
while read line; do
|
||||
model_name=${line}
|
||||
if [[ $model_name == \#* ]]; then
|
||||
continue
|
||||
fi
|
||||
echo ${model_name}'_train' >> "${run_x86_sse_log_file}"
|
||||
echo 'cd '${x86_path}'/mindspore-lite-'${version}'-runtime-x86-sse-'${process_unit_x86} >> "${run_x86_sse_log_file}"
|
||||
cd ${x86_path}/mindspore-lite-${version}-runtime-x86-sse-${process_unit_x86} || return 1
|
||||
echo 'export LD_LIBRARY_PATH=$LD_LIBRARY_PATH:./lib:./third_party/libjpeg-turbo/lib:./third_party/opencv/lib;./benchmark/benchmark --modelFile='${ms_models_path}'/'${model_name}'_train.ms --inDataFile=/home/workspace/mindspore_dataset/mslite/models/hiai/input_output/input/'${model_name}'.ms.bin --benchmarkDataFile=/home/workspace/mindspore_dataset/mslite/models/hiai/input_output/output/'${model_name}'.train.ms.out' >> "${run_x86_sse_log_file}"
|
||||
export LD_LIBRARY_PATH=$LD_LIBRARY_PATH:./lib:./third_party/libjpeg-turbo/lib:./third_party/opencv/lib;./benchmark/benchmark --modelFile=${ms_models_path}/${model_name}'_train'.ms --inDataFile=/home/workspace/mindspore_dataset/mslite/models/hiai/input_output/input/${model_name}.ms.bin --benchmarkDataFile=/home/workspace/mindspore_dataset/mslite/models/hiai/input_output/output/${model_name}.train.ms.out --accuracyThreshold=1.5 >> "${run_x86_sse_log_file}"
|
||||
if [ $? = 0 ]; then
|
||||
run_result='x86_sse: '${model_name}'_train pass'; echo ${run_result} >> ${run_benchmark_result_file}
|
||||
else
|
||||
run_result='x86_sse: '${model_name}'_train failed'; echo ${run_result} >> ${run_benchmark_result_file}; return 1
|
||||
fi
|
||||
done < ${models_mindspore_train_config}
|
||||
|
||||
# Run mindspore converted models:
|
||||
while read line; do
|
||||
model_name=${line}
|
||||
if [[ $model_name == \#* ]]; then
|
||||
continue
|
||||
fi
|
||||
echo ${model_name} >> "${run_x86_sse_log_file}"
|
||||
echo 'cd '${x86_path}'/mindspore-lite-'${version}'-runtime-x86-sse-'${process_unit_x86} >> "${run_x86_sse_log_file}"
|
||||
cd ${x86_path}/mindspore-lite-${version}-runtime-x86-sse-${process_unit_x86} || return 1
|
||||
echo 'export LD_LIBRARY_PATH=$LD_LIBRARY_PATH:./lib:./third_party/libjpeg-turbo/lib:./third_party/opencv/lib;./benchmark/benchmark --modelFile='${ms_models_path}'/'${model_name}'.ms --inDataFile=/home/workspace/mindspore_dataset/mslite/models/hiai/input_output/input/'${model_name}'.ms.bin --benchmarkDataFile=/home/workspace/mindspore_dataset/mslite/models/hiai/input_output/output/'${model_name}'.ms.out' >> "${run_x86_sse_log_file}"
|
||||
export LD_LIBRARY_PATH=$LD_LIBRARY_PATH:./lib:./third_party/libjpeg-turbo/lib:./third_party/opencv/lib;./benchmark/benchmark --modelFile=${ms_models_path}/${model_name}.ms --inDataFile=/home/workspace/mindspore_dataset/mslite/models/hiai/input_output/input/${model_name}.ms.bin --benchmarkDataFile=/home/workspace/mindspore_dataset/mslite/models/hiai/input_output/output/${model_name}.ms.out --accuracyThreshold=1.5 >> "${run_x86_sse_log_file}"
|
||||
if [ $? = 0 ]; then
|
||||
run_result='x86_sse: '${model_name}' pass'; echo ${run_result} >> ${run_benchmark_result_file}
|
||||
else
|
||||
run_result='x86_sse: '${model_name}' failed'; echo ${run_result} >> ${run_benchmark_result_file}; return 1
|
||||
fi
|
||||
done < ${models_mindspore_config}
|
||||
|
||||
# Run tflite weight quantization converted models:
|
||||
while read line; do
|
||||
model_name=${line}
|
||||
if [[ $model_name == \#* ]]; then
|
||||
continue
|
||||
fi
|
||||
echo ${model_name} >> "${run_x86_sse_log_file}"
|
||||
echo 'cd '${x86_path}'/mindspore-lite-'${version}'-runtime-x86-sse-'${process_unit_x86} >> "${run_x86_sse_log_file}"
|
||||
cd ${x86_path}/mindspore-lite-${version}-runtime-x86-sse-${process_unit_x86} || return 1
|
||||
echo 'export LD_LIBRARY_PATH=$LD_LIBRARY_PATH:./lib:./third_party/libjpeg-turbo/lib:./third_party/opencv/lib;./benchmark/benchmark --modelFile='${ms_models_path}'/'${model_name}'.ms --inDataFile=/home/workspace/mindspore_dataset/mslite/models/hiai/input_output/input/'${model_name}'.ms.bin --benchmarkDataFile=/home/workspace/mindspore_dataset/mslite/models/hiai/input_output/output/'${model_name}'.ms.out' >> "${run_x86_sse_log_file}"
|
||||
export LD_LIBRARY_PATH=$LD_LIBRARY_PATH:./lib:./third_party/libjpeg-turbo/lib:./third_party/opencv/lib;./benchmark/benchmark --modelFile=${ms_models_path}/${model_name}_weightquant.ms --inDataFile=/home/workspace/mindspore_dataset/mslite/models/hiai/input_output/input/${model_name}.ms.bin --benchmarkDataFile=/home/workspace/mindspore_dataset/mslite/models/hiai/input_output/output/${model_name}.ms.out >> "${run_x86_sse_log_file}"
|
||||
if [ $? = 0 ]; then
|
||||
run_result='x86_sse: '${model_name}' pass'; echo ${run_result} >> ${run_benchmark_result_file}
|
||||
else
|
||||
run_result='x86_sse: '${model_name}' failed'; echo ${run_result} >> ${run_benchmark_result_file}; return 1
|
||||
fi
|
||||
done < ${models_tflite_weightquant_config}
|
||||
|
||||
# Run mindir weight quantization converted models:
|
||||
while read line; do
|
||||
model_name=${line}
|
||||
if [[ $model_name == \#* ]]; then
|
||||
continue
|
||||
fi
|
||||
echo ${model_name} >> "${run_x86_sse_log_file}"
|
||||
echo 'cd '${x86_path}'/mindspore-lite-'${version}'-runtime-x86-sse-'${process_unit_x86} >> "${run_x86_sse_log_file}"
|
||||
cd ${x86_path}/mindspore-lite-${version}-runtime-x86-sse-${process_unit_x86} || return 1
|
||||
echo 'export LD_LIBRARY_PATH=$LD_LIBRARY_PATH:./lib:./third_party/libjpeg-turbo/lib:./third_party/opencv/lib;./benchmark/benchmark --modelFile='${ms_models_path}'/'${model_name}'.ms --inDataFile=/home/workspace/mindspore_dataset/mslite/models/hiai/input_output/input/'${model_name}'.ms.bin --benchmarkDataFile=/home/workspace/mindspore_dataset/mslite/models/hiai/input_output/output/'${model_name}'.ms.out' >> "${run_x86_sse_log_file}"
|
||||
export LD_LIBRARY_PATH=$LD_LIBRARY_PATH:./lib:./third_party/libjpeg-turbo/lib:./third_party/opencv/lib;./benchmark/benchmark --modelFile=${ms_models_path}/${model_name}_weightquant.ms --inDataFile=/home/workspace/mindspore_dataset/mslite/models/hiai/input_output/input/${model_name}.ms.bin --benchmarkDataFile=/home/workspace/mindspore_dataset/mslite/models/hiai/input_output/output/${model_name}.weightquant.ms.out >> "${run_x86_sse_log_file}"
|
||||
if [ $? = 0 ]; then
|
||||
run_result='x86_sse: '${model_name}' pass'; echo ${run_result} >> ${run_benchmark_result_file}
|
||||
else
|
||||
run_result='x86_sse: '${model_name}' failed'; echo ${run_result} >> ${run_benchmark_result_file}; return 1
|
||||
fi
|
||||
done < ${models_mindspore_weightquant_config}
|
||||
|
||||
# Run mindir mixbit weight quantization converted models:
|
||||
while read line; do
|
||||
model_name=${line}
|
||||
if [[ $model_name == \#* ]]; then
|
||||
continue
|
||||
fi
|
||||
echo ${model_name} >> "${run_x86_sse_log_file}"
|
||||
echo 'cd '${x86_path}'/mindspore-lite-'${version}'-runtime-x86-sse-'${process_unit_x86} >> "${run_x86_sse_log_file}"
|
||||
cd ${x86_path}/mindspore-lite-${version}-runtime-x86-sse-${process_unit_x86} || return 1
|
||||
echo 'export LD_LIBRARY_PATH=$LD_LIBRARY_PATH:./lib:./third_party/libjpeg-turbo/lib:./third_party/opencv/lib;./benchmark/benchmark --modelFile='${ms_models_path}'/'${model_name}'_7bit.ms --inDataFile=/home/workspace/mindspore_dataset/mslite/models/hiai/input_output/input/'${model_name}'.ms.bin --benchmarkDataFile=/home/workspace/mindspore_dataset/mslite/models/hiai/input_output/output/'${model_name}'_7bit.ms.out' >> "${run_x86_sse_log_file}"
|
||||
export LD_LIBRARY_PATH=$LD_LIBRARY_PATH:./lib:./third_party/libjpeg-turbo/lib:./third_party/opencv/lib;./benchmark/benchmark --modelFile=${ms_models_path}/${model_name}_7bit.ms --inDataFile=/home/workspace/mindspore_dataset/mslite/models/hiai/input_output/input/${model_name}.ms.bin --benchmarkDataFile=/home/workspace/mindspore_dataset/mslite/models/hiai/input_output/output/${model_name}_7bit.ms.out >> "${run_x86_sse_log_file}"
|
||||
if [ $? = 0 ]; then
|
||||
run_result='x86_sse: '${model_name}'_7bit pass'; echo ${run_result} >> ${run_benchmark_result_file}
|
||||
else
|
||||
run_result='x86_sse: '${model_name}'_7bit failed'; echo ${run_result} >> ${run_benchmark_result_file}; return 1
|
||||
fi
|
||||
echo 'export LD_LIBRARY_PATH=$LD_LIBRARY_PATH:./lib:./third_party/libjpeg-turbo/lib:./third_party/opencv/lib;./benchmark/benchmark --modelFile='${ms_models_path}'/'${model_name}'_9bit.ms --inDataFile=/home/workspace/mindspore_dataset/mslite/models/hiai/input_output/input/'${model_name}'.ms.bin --benchmarkDataFile=/home/workspace/mindspore_dataset/mslite/models/hiai/input_output/output/'${model_name}'_9bit.ms.out' >> "${run_x86_sse_log_file}"
|
||||
export LD_LIBRARY_PATH=$LD_LIBRARY_PATH:./lib:./third_party/libjpeg-turbo/lib:./third_party/opencv/lib;./benchmark/benchmark --modelFile=${ms_models_path}/${model_name}_9bit.ms --inDataFile=/home/workspace/mindspore_dataset/mslite/models/hiai/input_output/input/${model_name}.ms.bin --benchmarkDataFile=/home/workspace/mindspore_dataset/mslite/models/hiai/input_output/output/${model_name}_9bit.ms.out >> "${run_x86_sse_log_file}"
|
||||
if [ $? = 0 ]; then
|
||||
run_result='x86_sse: '${model_name}'_9bit pass'; echo ${run_result} >> ${run_benchmark_result_file}
|
||||
else
|
||||
run_result='x86_sse: '${model_name}'_9bit failed'; echo ${run_result} >> ${run_benchmark_result_file}; return 1
|
||||
fi
|
||||
done < ${models_mindspore_mixbit_config}
|
||||
|
||||
# Run converted models which do not need to be cared about the accuracy:
|
||||
while read line; do
|
||||
model_name=${line}
|
||||
if [[ ${line##*.} == "caffemodel" ]]; then
|
||||
model_name=${line%.*}
|
||||
fi
|
||||
echo ${model_name} >> "${run_x86_sse_log_file}"
|
||||
echo 'cd '${x86_path}'/mindspore-lite-'${version}'-runtime-x86-sse-'${process_unit_x86} >> "{run_x86_sse_log_file}"
|
||||
cd ${x86_path}/mindspore-lite-${version}-runtime-x86-sse-${process_unit_x86} || return 1
|
||||
echo 'export LD_LIBRARY_PATH=$LD_LIBRARY_PATH:./lib:./third_party/libjpeg-turbo/lib:./third_party/opencv/lib;./benchmark/benchmark --modelFile='${ms_models_path}'/'${model_name}'.ms --loopCount=1 --warmUpLoopCount=0' >> "${run_x86_sse_log_file}"
|
||||
export LD_LIBRARY_PATH=$LD_LIBRARY_PATH:./lib:./third_party/libjpeg-turbo/lib:./third_party/opencv/lib;./benchmark/benchmark --modelFile=${ms_models_path}/${model_name}.ms --loopCount=1 --warmUpLoopCount=0 >> "${run_x86_sse_log_file}"
|
||||
if [ $? = 0 ]; then
|
||||
run_result='x86_sse: '${model_name}' pass'; echo ${run_result} >> ${run_benchmark_result_file}
|
||||
else
|
||||
run_result='x86_sse: '${model_name}' failed'; echo ${run_result} >> ${run_benchmark_result_file}; return 1
|
||||
fi
|
||||
done < ${models_only_for_process_config}
|
||||
}
|
||||
|
||||
# Run on arm64 platform:
|
||||
function Run_arm64() {
|
||||
# Unzip arm64
|
||||
|
@ -979,6 +1208,12 @@ IFS="-" read -r -a file_name_array <<< "$file_name"
|
|||
IFS="." read -r -a suffix <<< "${file_name_array[-1]}"
|
||||
process_unit_x86=${suffix[0]}
|
||||
|
||||
x86_path=${release_path}/ubuntu_x86
|
||||
file_name=$(ls ${x86_path}/*runtime-x86-sse*.tar.gz)
|
||||
IFS="-" read -r -a file_name_array <<< "$file_name"
|
||||
IFS="." read -r -a suffix <<< "${file_name_array[-1]}"
|
||||
process_unit_x86=${suffix[0]}
|
||||
|
||||
# Set models config filepath
|
||||
models_tflite_config=${basepath}/models_tflite.cfg
|
||||
models_caffe_config=${basepath}/models_caffe.cfg
|
||||
|
@ -1036,6 +1271,7 @@ else
|
|||
exit 1
|
||||
fi
|
||||
|
||||
|
||||
# Write benchmark result to temp file
|
||||
run_benchmark_result_file=${basepath}/run_benchmark_result.txt
|
||||
echo ' ' > ${run_benchmark_result_file}
|
||||
|
@ -1067,6 +1303,12 @@ Run_x86 &
|
|||
Run_x86_PID=$!
|
||||
sleep 1
|
||||
|
||||
# Run on x86-sse
|
||||
echo "start Run x86 sse ..."
|
||||
Run_x86_sse &
|
||||
Run_x86_sse_PID=$!
|
||||
sleep 1
|
||||
|
||||
# Run on arm64
|
||||
echo "start Run arm64 ..."
|
||||
Run_arm64
|
||||
|
@ -1099,6 +1341,16 @@ if [[ ${Run_x86_status} != 0 ]];then
|
|||
exit 1
|
||||
fi
|
||||
|
||||
wait ${Run_x86_sse_PID}
|
||||
Run_x86_sse_status=$?
|
||||
|
||||
if [[ ${Run_x86_sse_status} != 0 ]];then
|
||||
echo "Run_x86 sse failed"
|
||||
cat ${run_x86_sse_log_file}
|
||||
Print_Benchmark_Result
|
||||
exit 1
|
||||
fi
|
||||
|
||||
if [[ ${Run_arm64_status} != 0 ]];then
|
||||
echo "Run_arm64 failed"
|
||||
cat ${run_arm64_log_file}
|
||||
|
|
Loading…
Reference in New Issue