forked from mindspore-Ecosystem/mindspore
!5078 fix int8 sdot kernel bugs
Merge pull request !5078 from lixian/master
This commit is contained in:
commit
e53a135687
|
@ -36,7 +36,7 @@ IndirectGemmInt8_24x4_dp:
|
||||||
ld1 {v17.4s}, [x22], x23
|
ld1 {v17.4s}, [x22], x23
|
||||||
ld1 {v18.4s}, [x22], x23
|
ld1 {v18.4s}, [x22], x23
|
||||||
ld1 {v19.4s}, [x22], x23
|
ld1 {v19.4s}, [x22], x23
|
||||||
ld1{v20.4s}, [x22], x23
|
ld1 {v20.4s}, [x22], x23
|
||||||
ld1 {v21.4s}, [x22], x23
|
ld1 {v21.4s}, [x22], x23
|
||||||
ld1 {v22.4s}, [x22], x23
|
ld1 {v22.4s}, [x22], x23
|
||||||
ld1 {v23.4s}, [x22], x23
|
ld1 {v23.4s}, [x22], x23
|
||||||
|
@ -404,7 +404,7 @@ IndirectGemmInt8_24x4_dp:
|
||||||
sshr v0.4s, v0.4s, #31
|
sshr v0.4s, v0.4s, #31
|
||||||
sqadd v8.4s, v8.4s, v0.4s
|
sqadd v8.4s, v8.4s, v0.4s
|
||||||
srshl v8.4s, v8.4s, v4.4s
|
srshl v8.4s, v8.4s, v4.4s
|
||||||
and v0.16b, v4.16b, v9.16b
|
and v1.16b, v4.16b, v9.16b
|
||||||
sshr v1.4s, v1.4s, #31
|
sshr v1.4s, v1.4s, #31
|
||||||
sqadd v9.4s, v9.4s, v1.4s
|
sqadd v9.4s, v9.4s, v1.4s
|
||||||
srshl v9.4s, v9.4s, v4.4s
|
srshl v9.4s, v9.4s, v4.4s
|
||||||
|
@ -420,7 +420,7 @@ IndirectGemmInt8_24x4_dp:
|
||||||
sshr v0.4s, v0.4s, #31
|
sshr v0.4s, v0.4s, #31
|
||||||
sqadd v12.4s, v12.4s, v0.4s
|
sqadd v12.4s, v12.4s, v0.4s
|
||||||
srshl v12.4s, v12.4s, v4.4s
|
srshl v12.4s, v12.4s, v4.4s
|
||||||
and v0.16b, v4.16b, v13.16b
|
and v1.16b, v4.16b, v13.16b
|
||||||
sshr v1.4s, v1.4s, #31
|
sshr v1.4s, v1.4s, #31
|
||||||
sqadd v13.4s, v13.4s, v1.4s
|
sqadd v13.4s, v13.4s, v1.4s
|
||||||
srshl v13.4s, v13.4s, v4.4s
|
srshl v13.4s, v13.4s, v4.4s
|
||||||
|
@ -436,7 +436,7 @@ IndirectGemmInt8_24x4_dp:
|
||||||
sshr v0.4s, v0.4s, #31
|
sshr v0.4s, v0.4s, #31
|
||||||
sqadd v16.4s, v16.4s, v0.4s
|
sqadd v16.4s, v16.4s, v0.4s
|
||||||
srshl v16.4s, v16.4s, v4.4s
|
srshl v16.4s, v16.4s, v4.4s
|
||||||
and v0.16b, v4.16b, v17.16b
|
and v1.16b, v4.16b, v17.16b
|
||||||
sshr v1.4s, v1.4s, #31
|
sshr v1.4s, v1.4s, #31
|
||||||
sqadd v17.4s, v17.4s, v1.4s
|
sqadd v17.4s, v17.4s, v1.4s
|
||||||
srshl v17.4s, v17.4s, v4.4s
|
srshl v17.4s, v17.4s, v4.4s
|
||||||
|
@ -452,7 +452,7 @@ IndirectGemmInt8_24x4_dp:
|
||||||
sshr v0.4s, v0.4s, #31
|
sshr v0.4s, v0.4s, #31
|
||||||
sqadd v20.4s, v20.4s, v0.4s
|
sqadd v20.4s, v20.4s, v0.4s
|
||||||
srshl v20.4s, v20.4s, v4.4s
|
srshl v20.4s, v20.4s, v4.4s
|
||||||
and v0.16b, v4.16b, v21.16b
|
and v1.16b, v4.16b, v21.16b
|
||||||
sshr v1.4s, v1.4s, #31
|
sshr v1.4s, v1.4s, #31
|
||||||
sqadd v21.4s, v21.4s, v1.4s
|
sqadd v21.4s, v21.4s, v1.4s
|
||||||
srshl v21.4s, v21.4s, v4.4s
|
srshl v21.4s, v21.4s, v4.4s
|
||||||
|
@ -468,7 +468,7 @@ IndirectGemmInt8_24x4_dp:
|
||||||
sshr v0.4s, v0.4s, #31
|
sshr v0.4s, v0.4s, #31
|
||||||
sqadd v24.4s, v24.4s, v0.4s
|
sqadd v24.4s, v24.4s, v0.4s
|
||||||
srshl v24.4s, v24.4s, v4.4s
|
srshl v24.4s, v24.4s, v4.4s
|
||||||
and v0.16b, v4.16b, v25.16b
|
and v1.16b, v4.16b, v25.16b
|
||||||
sshr v1.4s, v1.4s, #31
|
sshr v1.4s, v1.4s, #31
|
||||||
sqadd v25.4s, v25.4s, v1.4s
|
sqadd v25.4s, v25.4s, v1.4s
|
||||||
srshl v25.4s, v25.4s, v4.4s
|
srshl v25.4s, v25.4s, v4.4s
|
||||||
|
@ -484,7 +484,7 @@ IndirectGemmInt8_24x4_dp:
|
||||||
sshr v0.4s, v0.4s, #31
|
sshr v0.4s, v0.4s, #31
|
||||||
sqadd v28.4s, v28.4s, v0.4s
|
sqadd v28.4s, v28.4s, v0.4s
|
||||||
srshl v28.4s, v28.4s, v4.4s
|
srshl v28.4s, v28.4s, v4.4s
|
||||||
and v0.16b, v4.16b, v29.16b
|
and v1.16b, v4.16b, v29.16b
|
||||||
sshr v1.4s, v1.4s, #31
|
sshr v1.4s, v1.4s, #31
|
||||||
sqadd v29.4s, v29.4s, v1.4s
|
sqadd v29.4s, v29.4s, v1.4s
|
||||||
srshl v29.4s, v29.4s, v4.4s
|
srshl v29.4s, v29.4s, v4.4s
|
||||||
|
|
Loading…
Reference in New Issue