Delete some comments

This commit is contained in:
zhanyuan 2020-08-31 11:17:05 +08:00
parent 7b0370f79e
commit ea4341852f
6 changed files with 0 additions and 165 deletions

View File

@ -6,46 +6,6 @@
.type MatmulFloatNeon64, %function
#endif
// A: LM [row_8 * depth] col_8_major
// B: RM [depth * col_8] row_8_major
// C: A*B [row_8 * col_8] col_8x8_major
// A * B -> [8 * depth] * [depth * 8] -> [8 * 4] * [4 * 8] or [8 * 1] * [1 * 8]
///////////////////////////////////////////////////////////////////////////////
//CommLoopMul RM 1x8 block
// /-----------------------------------------\
// |v2.s[0] ... v2.s[3] v3.s[0] ... v3.s[3]|
// \-----------------------------------------/
// LM 8x1 block
// /---------------------\ /-----------------------------------------\
// | v0.s[0] | |v16.s[0]...v16.s[3] v17.s[0]...v17.s[3]|
// | ... | | ... ... |
// | v0.s[3] | |v22.s[0]...v22.s[3] v23.s[0]...v23.s[3]|
// | v1.s[0] | |v24.s[0]...v24.s[3] v25.s[0]...v25.s[3]|
// | ... | | ... ... |
// | v1.s[3] | |v30.s[0]...v30.s[3] v31.s[0]...v31.s[3]|
// \---------------------/ \-----------------------------------------/
// accumulators 8x8 block
//
///////////////////////////////////////////////////////////////////////////////
//OptLoopMul4 RM 4x8 block
// /--------------------------------------------\
// |v8.s[0] ... v8.s[3] v9.s[0] ... v9.s[3] |
// |v10.s[0] ... v10.s[3] v11.s[0] ... v11.s[3]|
// |v12.s[0] ... v12.s[3] v13.s[0] ... v13.s[3]|
// |v14.s[0] ... v14.s[3] v15.s[0] ... v15.s[3]|
// \--------------------------------------------/
// LM 8x4 block
// /---------------------------------\ /--------------------------------------------\
// | v0.s[0] v2.s[0] v4.s[0] v6.s[0] | |v16.s[0]...v16.s[3] v17.s[0]...v17.s[3] |
// | ... ... ... ... | | ... ... |
// | v0.s[3] v2.s[3] v4.s[3] v6.s[3] | |v22.s[0]...v22.s[3] v23.s[0]...v23.s[3] |
// | v1.s[0] v3.s[0] v5.s[0] v7.s[0] | |v24.s[0]...v24.s[3] v25.s[0]...v25.s[3] |
// | ... ... ... ... | | ... ... |
// | v1.s[3] v3.s[3] v5.s[3] v7.s[3] | |v30.s[0]...v30.s[3] v31.s[0]...v31.s[3] |
// \---------------------------------/ \--------------------------------------------/
// accumulators 8x8 block
/////////////////////////////////////////////////////////////////////////////////
//
// void MatmulFloatNeon64(const float *a, const float *b, float *c, const float *bias, int act_type, int depth
// int row, int col, int stride, bool write_nhwc)
// x0: a

View File

@ -6,46 +6,6 @@
.type MatmulFloatNeon64Opt, %function
#endif
// A: LM [row_8 * depth] col_8_major
// B: RM [depth * col_8] row_8_major
// C: A*B [row_8 * col_8] col_8x8_major
// A * B -> [8 * depth] * [depth * 8] -> [8 * 4] * [4 * 8] or [8 * 1] * [1 * 8]
///////////////////////////////////////////////////////////////////////////////
//CommLoopMul RM 1x8 block
// /-----------------------------------------\
// |v2.s[0] ... v2.s[3] v3.s[0] ... v3.s[3]|
// \-----------------------------------------/
// LM 8x1 block
// /---------------------\ /-----------------------------------------\
// | v0.s[0] | |v16.s[0]...v16.s[3] v17.s[0]...v17.s[3]|
// | ... | | ... ... |
// | v0.s[3] | |v22.s[0]...v22.s[3] v23.s[0]...v23.s[3]|
// | v1.s[0] | |v24.s[0]...v24.s[3] v25.s[0]...v25.s[3]|
// | ... | | ... ... |
// | v1.s[3] | |v30.s[0]...v30.s[3] v31.s[0]...v31.s[3]|
// \---------------------/ \-----------------------------------------/
// accumulators 8x8 block
//
///////////////////////////////////////////////////////////////////////////////
//OptLoopMul4 RM 4x8 block
// /--------------------------------------------\
// |v8.s[0] ... v8.s[3] v9.s[0] ... v9.s[3] |
// |v10.s[0] ... v10.s[3] v11.s[0] ... v11.s[3]|
// |v12.s[0] ... v12.s[3] v13.s[0] ... v13.s[3]|
// |v14.s[0] ... v14.s[3] v15.s[0] ... v15.s[3]|
// \--------------------------------------------/
// LM 8x4 block
// /---------------------------------\ /--------------------------------------------\
// | v0.s[0] v2.s[0] v4.s[0] v6.s[0] | |v16.s[0]...v16.s[3] v17.s[0]...v17.s[3] |
// | ... ... ... ... | | ... ... |
// | v0.s[3] v2.s[3] v4.s[3] v6.s[3] | |v22.s[0]...v22.s[3] v23.s[0]...v23.s[3] |
// | v1.s[0] v3.s[0] v5.s[0] v7.s[0] | |v24.s[0]...v24.s[3] v25.s[0]...v25.s[3] |
// | ... ... ... ... | | ... ... |
// | v1.s[3] v3.s[3] v5.s[3] v7.s[3] | |v30.s[0]...v30.s[3] v31.s[0]...v31.s[3] |
// \---------------------------------/ \--------------------------------------------/
// accumulators 8x8 block
/////////////////////////////////////////////////////////////////////////////////
//
// void MatmulFloatNeon64(const float *a, const float *b, float *c, const float *bias, int act_type, int depth
// int row, int col, size_t stride, size_t writeNhwc, size_t WriteWino)
// x0: a

View File

@ -6,22 +6,6 @@
.type MatmulInt8Neon64, %function
#endif
//
// int8 RM 16x4 block
// /-----------------------------------------|
// |v4.b[0] v5.b[0] v6.b[0] v7.b[0] |
// | ... ... ... ... |
// |v4.b[15] v5.b[15] v5.b[15] v7.b[15] |
// \-----------------------------------------/
// int8 LM 4x16 block
// /---------------------\ /-----------------------------------------|
// |v0.b[0] ... v0.b[15] | |v16.4s v17.4s v18.4s v19.4s |
// |v1.b[0] ... v1.b[15] | |v20.4s v21.4s v22.4s v23.4s |
// |v2.b[0] ... v2.b[15] | |v24.4s v25.4s v26.4s v27.4s |
// |v3.b[0] ... v3.b[15] | |v28.4s v29.4s v30.4s v31.4s |
// \---------------------/ \-----------------------------------------/
// int32 accumulators 4x4 block
//void MatmulInt8Neon64(const int8_t *a, const int8_t *b, int8_t *dst, int row4, int col4, int deep16,
// const int *a_sums, const int *bias, int act_min, int act_max, int out_zp,
// int multiplier, int left_shift, int right_shift, int row, int col, int stride);

View File

@ -6,22 +6,6 @@
.type MatMulR4Int8Neon64, %function
#endif
//
// int8 RM 16x4 block
// /-----------------------------------------|
// |v4.b[0] v5.b[0] v6.b[0] v7.b[0] |
// | ... ... ... ... |
// |v4.b[15] v5.b[15] v5.b[15] v7.b[15] |
// \-----------------------------------------/
// int8 LM 4x16 block
// /---------------------\ /-----------------------------------------|
// |v0.b[0] ... v0.b[15] | |v16.4s v17.4s v18.4s v19.4s |
// |v1.b[0] ... v1.b[15] | |v20.4s v21.4s v22.4s v23.4s |
// |v2.b[0] ... v2.b[15] | |v24.4s v25.4s v26.4s v27.4s |
// |v3.b[0] ... v3.b[15] | |v28.4s v29.4s v30.4s v31.4s |
// \---------------------/ \-----------------------------------------/
// int32 accumulators 4x4 block
//void MatMulR4Int8Neon64(const int8_t *a, const int8_t *b, int32_t *dst, int row4, int col4, int deep16,
// const int *input_sum, const int *bias)

View File

@ -6,43 +6,6 @@
.type MatmulInt8DpNeon64, %function
#endif
//
// int8 RHS 4x8 block
// /-----------------------------------------|
// |v2.b[0] ... v2.b[12] v3.b[0] ... v3.b[12]|
// | ... ... |
// |v2.b[3] ... v2.b[15] v3.b[3] ... v3.b[15]|
// \-----------------------------------------/
// int8 LHS 8x4 block
// /---------------------\ /-------------------------------------------|
// |v0.b[0] ... v0.b[3] | |v16.s[0] ... v16.s[3] v17.s[0] ... v17.s[3]|
// |v0.b[4] ... v0.b[7] |v18.s[0] ... v18.s[3] v19.s[0] ... v19.s[3]|
// |v0.b[8] ... v0.b[11] |v20.s[0] ... v20.s[3] v21.s[0] ... v21.s[3]|
// |v0.b[12] ... v0.b[15]| |v22.s[0] ... v22.s[3] v23.s[0] ... v23.s[3]|
// |v1.b[0] ... v1.b[3] | |v24.s[0] ... v24.s[3] v25.s[0] ... v25.s[3]|
// |v1.b[4] ... v1.b[7] | |v26.s[0] ... v26.s[3] v27.s[0] ... v27.s[3]|
// |v1.b[8] ... v1.b[11]| |v28.s[0] ... v28.s[3] v29.s[0] ... v29.s[3]|
// |v1.b[12] ... v1.b[15]| |v30.s[0] ... v30.s[3] v31.s[0] ... v31.s[3]|
// \---------------------/ \-------------------------------------------/
// int32 accumulators 8x8 block
// int8 RHS 16x8 block
// /-------------|
// |v2 v3 |
// |v6 v7 |
// |v10 v11 |
// |v14 v15 |
// \-------------/
// int8 LHS 8x16 block
// /--------------------\ /-------------|
// |v0 v4 v8 v12| | |
// |v1 v5 v9 v13| | |
// \--------------------/ \-------------/
//void MatmulInt8DpNeon64(const int8_t *a, const int8_t *b, int8_t *dst, int row8, int col8, int deep4,
// const int *a_sums, const int *bias, int act_min, int act_max, int out_zp,
// int multiplier, int left_shift, int right_shift, int row, int col, int stride);

View File

@ -6,22 +6,6 @@
.type MatMulOptR4Int8Neon64, %function
#endif
//
// int8 RM 16x4 block
// /-----------------------------------------|
// |v4.b[0] v5.b[0] v6.b[0] v7.b[0] |
// | ... ... ... ... |
// |v4.b[15] v5.b[15] v5.b[15] v7.b[15] |
// \-----------------------------------------/
// int8 LM 4x16 block
// /---------------------\ /-----------------------------------------|
// |v0.b[0] ... v0.b[15] | |v16.4s v17.4s v18.4s v19.4s |
// |v1.b[0] ... v1.b[15] | |v20.4s v21.4s v22.4s v23.4s |
// |v2.b[0] ... v2.b[15] | |v24.4s v25.4s v26.4s v27.4s |
// |v3.b[0] ... v3.b[15] | |v28.4s v29.4s v30.4s v31.4s |
// \---------------------/ \-----------------------------------------/
// int32 accumulators 4x4 block
//void MatMulOptR4Int8Neon64(const int8_t *a, const int8_t *b, int *dst, int row4, int col4, int deep16,
// const int *input_sum, const int *bias)