optimization for fp16 matmul kernel

2020-09-29 10:37:51 +08:00 · 2020-09-29 10:37:51 +08:00 · 869bffe976
parent 56c3d2219e
commit 869bffe976
3 changed files with 1319 additions and 1 deletions
--- a/mindspore/lite/nnacl/assembly/fp16/MatmulFp16Opt.S
+++ b/mindspore/lite/nnacl/assembly/fp16/MatmulFp16Opt.S
--- a/mindspore/lite/nnacl/fp16/matmul_fp16.c
+++ b/mindspore/lite/nnacl/fp16/matmul_fp16.c
@ -87,7 +87,11 @@ void MatMul16x8(const float16_t *a, const float16_t *b, float16_t *dst, const fl

 void MatMulFp16(const float16_t *a, const float16_t *b, float16_t *c, const float16_t *bias, ActType act_type,
                int depth, int row, int col, int stride, bool write_nhwc) {
-  MatmulFp16Neon64(a, b, c, bias, (int)act_type, depth, row, col, stride, write_nhwc);
+  if (!write_nhwc) {
+    MatmulFp16Neon64(a, b, c, bias, (int)act_type, depth, row, col, stride, write_nhwc);
+  } else {
+    MatmulFp16Neon64Opt(a, b, c, bias, (int)act_type, depth, row, col, stride, 1);
+  }
  return;
 }

--- a/mindspore/lite/nnacl/fp16/matmul_fp16.h
+++ b/mindspore/lite/nnacl/fp16/matmul_fp16.h
@ -39,6 +39,9 @@ void RowMajor2Col16MajorFp16Opt(float16_t *src_ptr, float16_t *dst_ptr, size_t r
 void MatmulFp16Neon64(const float16_t *a, const float16_t *b, float16_t *c, const float16_t *bias, int act_type,
                      size_t depth, size_t row, size_t col, size_t stride, bool write_nhwc);

+void MatmulFp16Neon64Opt(const float16_t *a, const float16_t *b, float16_t *c, const float16_t *bias, int act_type,
+                         size_t depth, size_t row, size_t col, size_t stride, int write_nhwc);
+
 void RowMajor2Col16MajorFp16(void *src, float16_t *dst, int row, int col, bool is_fp32_src);

 void RowMajor2Row16MajorFp16(void *src, float16_t *dst, int row, int col, bool is_fp32_src);