OpenCloudOS-Kernel/arch/arm64/crypto/sm4-ce-core.S

1094 lines
20 KiB
ArmAsm
Raw Normal View History

crypto: arm64/sm4 - add ARMv8 Crypto Extensions implementation This adds ARMv8 implementations of SM4 in ECB, CBC, CFB and CTR modes using Crypto Extensions, also includes key expansion operations because the Crypto Extensions instruction is much faster than software implementations. The Crypto Extensions for SM4 can only run on ARMv8 implementations that have support for these optional extensions. Benchmark on T-Head Yitian-710 2.75 GHz, the data comes from the 218 mode of tcrypt. The abscissas are blocks of different lengths. The data is tabulated and the unit is Mb/s: sm4-generic | 16 64 128 256 1024 1420 4096 ECB enc | 80.05 91.42 93.66 94.77 95.69 95.77 95.86 ECB dec | 79.98 91.41 93.64 94.76 95.66 95.77 95.85 CBC enc | 78.55 86.50 88.02 88.77 89.36 89.42 89.48 CBC dec | 76.82 89.06 91.52 92.77 93.75 93.83 93.96 CFB enc | 77.64 86.13 87.62 88.42 89.08 88.83 89.18 CFB dec | 77.57 88.34 90.36 91.45 92.34 92.00 92.44 CTR enc | 77.80 88.28 90.23 91.22 92.11 91.81 92.25 CTR dec | 77.83 88.22 90.22 91.22 92.04 91.82 92.28 sm4-neon ECB enc | 28.31 112.77 203.03 209.89 215.49 202.11 210.59 ECB dec | 28.36 113.45 203.23 210.00 215.52 202.13 210.65 CBC enc | 79.32 87.02 88.51 89.28 89.85 89.89 89.97 CBC dec | 28.29 112.20 203.30 209.82 214.99 201.51 209.95 CFB enc | 79.59 87.16 88.54 89.30 89.83 89.62 89.92 CFB dec | 28.12 111.05 202.47 209.02 214.21 210.90 209.12 CTR enc | 28.04 108.81 200.62 206.65 211.78 208.78 206.74 CTR dec | 28.02 108.82 200.45 206.62 211.78 208.74 206.70 sm4-ce-cipher ECB enc | 336.79 587.13 682.70 747.37 803.75 811.52 818.06 ECB dec | 339.18 584.52 679.72 743.68 798.82 803.83 811.54 CBC enc | 316.63 521.47 597.00 647.14 690.82 695.21 700.55 CBC dec | 291.80 503.79 585.66 640.82 689.86 695.16 701.72 CFB enc | 294.79 482.31 552.13 594.71 631.60 628.91 638.92 CFB dec | 293.09 466.44 526.56 563.17 594.41 592.26 601.97 CTR enc | 309.61 506.13 576.86 620.47 656.38 654.51 665.10 CTR dec | 306.69 505.57 576.84 620.18 657.09 654.52 665.32 sm4-ce ECB enc | 366.96 1329.81 2024.29 2755.50 3790.07 3861.91 4051.40 ECB dec | 367.30 1323.93 2018.72 2747.43 3787.39 3862.55 4052.62 CBC enc | 358.09 682.68 807.24 885.35 958.29 963.60 973.73 CBC dec | 366.51 1303.63 1978.64 2667.93 3624.53 3683.41 3856.08 CFB enc | 351.51 681.26 807.81 893.10 968.54 969.17 985.83 CFB dec | 354.98 1266.61 1929.63 2634.81 3614.23 3611.59 3841.68 CTR enc | 324.23 1121.25 1689.44 2256.70 2981.90 3007.79 3060.74 CTR dec | 324.18 1120.44 1694.31 2258.32 2982.01 3010.09 3060.99 Signed-off-by: Tianjia Zhang <tianjia.zhang@linux.alibaba.com> Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
2022-03-15 17:44:54 +08:00
/* SPDX-License-Identifier: GPL-2.0-or-later */
/*
* SM4 Cipher Algorithm for ARMv8 with Crypto Extensions
* as specified in
* https://tools.ietf.org/id/draft-ribose-cfrg-sm4-10.html
*
* Copyright (C) 2022, Alibaba Group.
* Copyright (C) 2022 Tianjia Zhang <tianjia.zhang@linux.alibaba.com>
*/
#include <linux/linkage.h>
#include <asm/assembler.h>
#include "sm4-ce-asm.h"
crypto: arm64/sm4 - add ARMv8 Crypto Extensions implementation This adds ARMv8 implementations of SM4 in ECB, CBC, CFB and CTR modes using Crypto Extensions, also includes key expansion operations because the Crypto Extensions instruction is much faster than software implementations. The Crypto Extensions for SM4 can only run on ARMv8 implementations that have support for these optional extensions. Benchmark on T-Head Yitian-710 2.75 GHz, the data comes from the 218 mode of tcrypt. The abscissas are blocks of different lengths. The data is tabulated and the unit is Mb/s: sm4-generic | 16 64 128 256 1024 1420 4096 ECB enc | 80.05 91.42 93.66 94.77 95.69 95.77 95.86 ECB dec | 79.98 91.41 93.64 94.76 95.66 95.77 95.85 CBC enc | 78.55 86.50 88.02 88.77 89.36 89.42 89.48 CBC dec | 76.82 89.06 91.52 92.77 93.75 93.83 93.96 CFB enc | 77.64 86.13 87.62 88.42 89.08 88.83 89.18 CFB dec | 77.57 88.34 90.36 91.45 92.34 92.00 92.44 CTR enc | 77.80 88.28 90.23 91.22 92.11 91.81 92.25 CTR dec | 77.83 88.22 90.22 91.22 92.04 91.82 92.28 sm4-neon ECB enc | 28.31 112.77 203.03 209.89 215.49 202.11 210.59 ECB dec | 28.36 113.45 203.23 210.00 215.52 202.13 210.65 CBC enc | 79.32 87.02 88.51 89.28 89.85 89.89 89.97 CBC dec | 28.29 112.20 203.30 209.82 214.99 201.51 209.95 CFB enc | 79.59 87.16 88.54 89.30 89.83 89.62 89.92 CFB dec | 28.12 111.05 202.47 209.02 214.21 210.90 209.12 CTR enc | 28.04 108.81 200.62 206.65 211.78 208.78 206.74 CTR dec | 28.02 108.82 200.45 206.62 211.78 208.74 206.70 sm4-ce-cipher ECB enc | 336.79 587.13 682.70 747.37 803.75 811.52 818.06 ECB dec | 339.18 584.52 679.72 743.68 798.82 803.83 811.54 CBC enc | 316.63 521.47 597.00 647.14 690.82 695.21 700.55 CBC dec | 291.80 503.79 585.66 640.82 689.86 695.16 701.72 CFB enc | 294.79 482.31 552.13 594.71 631.60 628.91 638.92 CFB dec | 293.09 466.44 526.56 563.17 594.41 592.26 601.97 CTR enc | 309.61 506.13 576.86 620.47 656.38 654.51 665.10 CTR dec | 306.69 505.57 576.84 620.18 657.09 654.52 665.32 sm4-ce ECB enc | 366.96 1329.81 2024.29 2755.50 3790.07 3861.91 4051.40 ECB dec | 367.30 1323.93 2018.72 2747.43 3787.39 3862.55 4052.62 CBC enc | 358.09 682.68 807.24 885.35 958.29 963.60 973.73 CBC dec | 366.51 1303.63 1978.64 2667.93 3624.53 3683.41 3856.08 CFB enc | 351.51 681.26 807.81 893.10 968.54 969.17 985.83 CFB dec | 354.98 1266.61 1929.63 2634.81 3614.23 3611.59 3841.68 CTR enc | 324.23 1121.25 1689.44 2256.70 2981.90 3007.79 3060.74 CTR dec | 324.18 1120.44 1694.31 2258.32 2982.01 3010.09 3060.99 Signed-off-by: Tianjia Zhang <tianjia.zhang@linux.alibaba.com> Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
2022-03-15 17:44:54 +08:00
.arch armv8-a+crypto
.irp b, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, \
20, 24, 25, 26, 27, 28, 29, 30, 31
crypto: arm64/sm4 - add ARMv8 Crypto Extensions implementation This adds ARMv8 implementations of SM4 in ECB, CBC, CFB and CTR modes using Crypto Extensions, also includes key expansion operations because the Crypto Extensions instruction is much faster than software implementations. The Crypto Extensions for SM4 can only run on ARMv8 implementations that have support for these optional extensions. Benchmark on T-Head Yitian-710 2.75 GHz, the data comes from the 218 mode of tcrypt. The abscissas are blocks of different lengths. The data is tabulated and the unit is Mb/s: sm4-generic | 16 64 128 256 1024 1420 4096 ECB enc | 80.05 91.42 93.66 94.77 95.69 95.77 95.86 ECB dec | 79.98 91.41 93.64 94.76 95.66 95.77 95.85 CBC enc | 78.55 86.50 88.02 88.77 89.36 89.42 89.48 CBC dec | 76.82 89.06 91.52 92.77 93.75 93.83 93.96 CFB enc | 77.64 86.13 87.62 88.42 89.08 88.83 89.18 CFB dec | 77.57 88.34 90.36 91.45 92.34 92.00 92.44 CTR enc | 77.80 88.28 90.23 91.22 92.11 91.81 92.25 CTR dec | 77.83 88.22 90.22 91.22 92.04 91.82 92.28 sm4-neon ECB enc | 28.31 112.77 203.03 209.89 215.49 202.11 210.59 ECB dec | 28.36 113.45 203.23 210.00 215.52 202.13 210.65 CBC enc | 79.32 87.02 88.51 89.28 89.85 89.89 89.97 CBC dec | 28.29 112.20 203.30 209.82 214.99 201.51 209.95 CFB enc | 79.59 87.16 88.54 89.30 89.83 89.62 89.92 CFB dec | 28.12 111.05 202.47 209.02 214.21 210.90 209.12 CTR enc | 28.04 108.81 200.62 206.65 211.78 208.78 206.74 CTR dec | 28.02 108.82 200.45 206.62 211.78 208.74 206.70 sm4-ce-cipher ECB enc | 336.79 587.13 682.70 747.37 803.75 811.52 818.06 ECB dec | 339.18 584.52 679.72 743.68 798.82 803.83 811.54 CBC enc | 316.63 521.47 597.00 647.14 690.82 695.21 700.55 CBC dec | 291.80 503.79 585.66 640.82 689.86 695.16 701.72 CFB enc | 294.79 482.31 552.13 594.71 631.60 628.91 638.92 CFB dec | 293.09 466.44 526.56 563.17 594.41 592.26 601.97 CTR enc | 309.61 506.13 576.86 620.47 656.38 654.51 665.10 CTR dec | 306.69 505.57 576.84 620.18 657.09 654.52 665.32 sm4-ce ECB enc | 366.96 1329.81 2024.29 2755.50 3790.07 3861.91 4051.40 ECB dec | 367.30 1323.93 2018.72 2747.43 3787.39 3862.55 4052.62 CBC enc | 358.09 682.68 807.24 885.35 958.29 963.60 973.73 CBC dec | 366.51 1303.63 1978.64 2667.93 3624.53 3683.41 3856.08 CFB enc | 351.51 681.26 807.81 893.10 968.54 969.17 985.83 CFB dec | 354.98 1266.61 1929.63 2634.81 3614.23 3611.59 3841.68 CTR enc | 324.23 1121.25 1689.44 2256.70 2981.90 3007.79 3060.74 CTR dec | 324.18 1120.44 1694.31 2258.32 2982.01 3010.09 3060.99 Signed-off-by: Tianjia Zhang <tianjia.zhang@linux.alibaba.com> Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
2022-03-15 17:44:54 +08:00
.set .Lv\b\().4s, \b
.endr
.macro sm4e, vd, vn
.inst 0xcec08400 | (.L\vn << 5) | .L\vd
.endm
.macro sm4ekey, vd, vn, vm
.inst 0xce60c800 | (.L\vm << 16) | (.L\vn << 5) | .L\vd
.endm
/* Register macros */
#define RTMP0 v16
#define RTMP1 v17
#define RTMP2 v18
#define RTMP3 v19
#define RIV v20
#define RMAC v20
#define RMASK v21
crypto: arm64/sm4 - add ARMv8 Crypto Extensions implementation This adds ARMv8 implementations of SM4 in ECB, CBC, CFB and CTR modes using Crypto Extensions, also includes key expansion operations because the Crypto Extensions instruction is much faster than software implementations. The Crypto Extensions for SM4 can only run on ARMv8 implementations that have support for these optional extensions. Benchmark on T-Head Yitian-710 2.75 GHz, the data comes from the 218 mode of tcrypt. The abscissas are blocks of different lengths. The data is tabulated and the unit is Mb/s: sm4-generic | 16 64 128 256 1024 1420 4096 ECB enc | 80.05 91.42 93.66 94.77 95.69 95.77 95.86 ECB dec | 79.98 91.41 93.64 94.76 95.66 95.77 95.85 CBC enc | 78.55 86.50 88.02 88.77 89.36 89.42 89.48 CBC dec | 76.82 89.06 91.52 92.77 93.75 93.83 93.96 CFB enc | 77.64 86.13 87.62 88.42 89.08 88.83 89.18 CFB dec | 77.57 88.34 90.36 91.45 92.34 92.00 92.44 CTR enc | 77.80 88.28 90.23 91.22 92.11 91.81 92.25 CTR dec | 77.83 88.22 90.22 91.22 92.04 91.82 92.28 sm4-neon ECB enc | 28.31 112.77 203.03 209.89 215.49 202.11 210.59 ECB dec | 28.36 113.45 203.23 210.00 215.52 202.13 210.65 CBC enc | 79.32 87.02 88.51 89.28 89.85 89.89 89.97 CBC dec | 28.29 112.20 203.30 209.82 214.99 201.51 209.95 CFB enc | 79.59 87.16 88.54 89.30 89.83 89.62 89.92 CFB dec | 28.12 111.05 202.47 209.02 214.21 210.90 209.12 CTR enc | 28.04 108.81 200.62 206.65 211.78 208.78 206.74 CTR dec | 28.02 108.82 200.45 206.62 211.78 208.74 206.70 sm4-ce-cipher ECB enc | 336.79 587.13 682.70 747.37 803.75 811.52 818.06 ECB dec | 339.18 584.52 679.72 743.68 798.82 803.83 811.54 CBC enc | 316.63 521.47 597.00 647.14 690.82 695.21 700.55 CBC dec | 291.80 503.79 585.66 640.82 689.86 695.16 701.72 CFB enc | 294.79 482.31 552.13 594.71 631.60 628.91 638.92 CFB dec | 293.09 466.44 526.56 563.17 594.41 592.26 601.97 CTR enc | 309.61 506.13 576.86 620.47 656.38 654.51 665.10 CTR dec | 306.69 505.57 576.84 620.18 657.09 654.52 665.32 sm4-ce ECB enc | 366.96 1329.81 2024.29 2755.50 3790.07 3861.91 4051.40 ECB dec | 367.30 1323.93 2018.72 2747.43 3787.39 3862.55 4052.62 CBC enc | 358.09 682.68 807.24 885.35 958.29 963.60 973.73 CBC dec | 366.51 1303.63 1978.64 2667.93 3624.53 3683.41 3856.08 CFB enc | 351.51 681.26 807.81 893.10 968.54 969.17 985.83 CFB dec | 354.98 1266.61 1929.63 2634.81 3614.23 3611.59 3841.68 CTR enc | 324.23 1121.25 1689.44 2256.70 2981.90 3007.79 3060.74 CTR dec | 324.18 1120.44 1694.31 2258.32 2982.01 3010.09 3060.99 Signed-off-by: Tianjia Zhang <tianjia.zhang@linux.alibaba.com> Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
2022-03-15 17:44:54 +08:00
.align 3
SYM_FUNC_START(sm4_ce_expand_key)
/* input:
* x0: 128-bit key
* x1: rkey_enc
* x2: rkey_dec
* x3: fk array
* x4: ck array
*/
ld1 {v0.16b}, [x0];
rev32 v0.16b, v0.16b;
ld1 {v1.16b}, [x3];
/* load ck */
ld1 {v24.16b-v27.16b}, [x4], #64;
ld1 {v28.16b-v31.16b}, [x4];
/* input ^ fk */
eor v0.16b, v0.16b, v1.16b;
sm4ekey v0.4s, v0.4s, v24.4s;
sm4ekey v1.4s, v0.4s, v25.4s;
sm4ekey v2.4s, v1.4s, v26.4s;
sm4ekey v3.4s, v2.4s, v27.4s;
sm4ekey v4.4s, v3.4s, v28.4s;
sm4ekey v5.4s, v4.4s, v29.4s;
sm4ekey v6.4s, v5.4s, v30.4s;
sm4ekey v7.4s, v6.4s, v31.4s;
adr_l x5, .Lbswap128_mask
ld1 {v24.16b}, [x5]
crypto: arm64/sm4 - add ARMv8 Crypto Extensions implementation This adds ARMv8 implementations of SM4 in ECB, CBC, CFB and CTR modes using Crypto Extensions, also includes key expansion operations because the Crypto Extensions instruction is much faster than software implementations. The Crypto Extensions for SM4 can only run on ARMv8 implementations that have support for these optional extensions. Benchmark on T-Head Yitian-710 2.75 GHz, the data comes from the 218 mode of tcrypt. The abscissas are blocks of different lengths. The data is tabulated and the unit is Mb/s: sm4-generic | 16 64 128 256 1024 1420 4096 ECB enc | 80.05 91.42 93.66 94.77 95.69 95.77 95.86 ECB dec | 79.98 91.41 93.64 94.76 95.66 95.77 95.85 CBC enc | 78.55 86.50 88.02 88.77 89.36 89.42 89.48 CBC dec | 76.82 89.06 91.52 92.77 93.75 93.83 93.96 CFB enc | 77.64 86.13 87.62 88.42 89.08 88.83 89.18 CFB dec | 77.57 88.34 90.36 91.45 92.34 92.00 92.44 CTR enc | 77.80 88.28 90.23 91.22 92.11 91.81 92.25 CTR dec | 77.83 88.22 90.22 91.22 92.04 91.82 92.28 sm4-neon ECB enc | 28.31 112.77 203.03 209.89 215.49 202.11 210.59 ECB dec | 28.36 113.45 203.23 210.00 215.52 202.13 210.65 CBC enc | 79.32 87.02 88.51 89.28 89.85 89.89 89.97 CBC dec | 28.29 112.20 203.30 209.82 214.99 201.51 209.95 CFB enc | 79.59 87.16 88.54 89.30 89.83 89.62 89.92 CFB dec | 28.12 111.05 202.47 209.02 214.21 210.90 209.12 CTR enc | 28.04 108.81 200.62 206.65 211.78 208.78 206.74 CTR dec | 28.02 108.82 200.45 206.62 211.78 208.74 206.70 sm4-ce-cipher ECB enc | 336.79 587.13 682.70 747.37 803.75 811.52 818.06 ECB dec | 339.18 584.52 679.72 743.68 798.82 803.83 811.54 CBC enc | 316.63 521.47 597.00 647.14 690.82 695.21 700.55 CBC dec | 291.80 503.79 585.66 640.82 689.86 695.16 701.72 CFB enc | 294.79 482.31 552.13 594.71 631.60 628.91 638.92 CFB dec | 293.09 466.44 526.56 563.17 594.41 592.26 601.97 CTR enc | 309.61 506.13 576.86 620.47 656.38 654.51 665.10 CTR dec | 306.69 505.57 576.84 620.18 657.09 654.52 665.32 sm4-ce ECB enc | 366.96 1329.81 2024.29 2755.50 3790.07 3861.91 4051.40 ECB dec | 367.30 1323.93 2018.72 2747.43 3787.39 3862.55 4052.62 CBC enc | 358.09 682.68 807.24 885.35 958.29 963.60 973.73 CBC dec | 366.51 1303.63 1978.64 2667.93 3624.53 3683.41 3856.08 CFB enc | 351.51 681.26 807.81 893.10 968.54 969.17 985.83 CFB dec | 354.98 1266.61 1929.63 2634.81 3614.23 3611.59 3841.68 CTR enc | 324.23 1121.25 1689.44 2256.70 2981.90 3007.79 3060.74 CTR dec | 324.18 1120.44 1694.31 2258.32 2982.01 3010.09 3060.99 Signed-off-by: Tianjia Zhang <tianjia.zhang@linux.alibaba.com> Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
2022-03-15 17:44:54 +08:00
st1 {v0.16b-v3.16b}, [x1], #64;
st1 {v4.16b-v7.16b}, [x1];
tbl v16.16b, {v7.16b}, v24.16b
tbl v17.16b, {v6.16b}, v24.16b
tbl v18.16b, {v5.16b}, v24.16b
tbl v19.16b, {v4.16b}, v24.16b
tbl v20.16b, {v3.16b}, v24.16b
tbl v21.16b, {v2.16b}, v24.16b
tbl v22.16b, {v1.16b}, v24.16b
tbl v23.16b, {v0.16b}, v24.16b
st1 {v16.16b-v19.16b}, [x2], #64
st1 {v20.16b-v23.16b}, [x2]
crypto: arm64/sm4 - add ARMv8 Crypto Extensions implementation This adds ARMv8 implementations of SM4 in ECB, CBC, CFB and CTR modes using Crypto Extensions, also includes key expansion operations because the Crypto Extensions instruction is much faster than software implementations. The Crypto Extensions for SM4 can only run on ARMv8 implementations that have support for these optional extensions. Benchmark on T-Head Yitian-710 2.75 GHz, the data comes from the 218 mode of tcrypt. The abscissas are blocks of different lengths. The data is tabulated and the unit is Mb/s: sm4-generic | 16 64 128 256 1024 1420 4096 ECB enc | 80.05 91.42 93.66 94.77 95.69 95.77 95.86 ECB dec | 79.98 91.41 93.64 94.76 95.66 95.77 95.85 CBC enc | 78.55 86.50 88.02 88.77 89.36 89.42 89.48 CBC dec | 76.82 89.06 91.52 92.77 93.75 93.83 93.96 CFB enc | 77.64 86.13 87.62 88.42 89.08 88.83 89.18 CFB dec | 77.57 88.34 90.36 91.45 92.34 92.00 92.44 CTR enc | 77.80 88.28 90.23 91.22 92.11 91.81 92.25 CTR dec | 77.83 88.22 90.22 91.22 92.04 91.82 92.28 sm4-neon ECB enc | 28.31 112.77 203.03 209.89 215.49 202.11 210.59 ECB dec | 28.36 113.45 203.23 210.00 215.52 202.13 210.65 CBC enc | 79.32 87.02 88.51 89.28 89.85 89.89 89.97 CBC dec | 28.29 112.20 203.30 209.82 214.99 201.51 209.95 CFB enc | 79.59 87.16 88.54 89.30 89.83 89.62 89.92 CFB dec | 28.12 111.05 202.47 209.02 214.21 210.90 209.12 CTR enc | 28.04 108.81 200.62 206.65 211.78 208.78 206.74 CTR dec | 28.02 108.82 200.45 206.62 211.78 208.74 206.70 sm4-ce-cipher ECB enc | 336.79 587.13 682.70 747.37 803.75 811.52 818.06 ECB dec | 339.18 584.52 679.72 743.68 798.82 803.83 811.54 CBC enc | 316.63 521.47 597.00 647.14 690.82 695.21 700.55 CBC dec | 291.80 503.79 585.66 640.82 689.86 695.16 701.72 CFB enc | 294.79 482.31 552.13 594.71 631.60 628.91 638.92 CFB dec | 293.09 466.44 526.56 563.17 594.41 592.26 601.97 CTR enc | 309.61 506.13 576.86 620.47 656.38 654.51 665.10 CTR dec | 306.69 505.57 576.84 620.18 657.09 654.52 665.32 sm4-ce ECB enc | 366.96 1329.81 2024.29 2755.50 3790.07 3861.91 4051.40 ECB dec | 367.30 1323.93 2018.72 2747.43 3787.39 3862.55 4052.62 CBC enc | 358.09 682.68 807.24 885.35 958.29 963.60 973.73 CBC dec | 366.51 1303.63 1978.64 2667.93 3624.53 3683.41 3856.08 CFB enc | 351.51 681.26 807.81 893.10 968.54 969.17 985.83 CFB dec | 354.98 1266.61 1929.63 2634.81 3614.23 3611.59 3841.68 CTR enc | 324.23 1121.25 1689.44 2256.70 2981.90 3007.79 3060.74 CTR dec | 324.18 1120.44 1694.31 2258.32 2982.01 3010.09 3060.99 Signed-off-by: Tianjia Zhang <tianjia.zhang@linux.alibaba.com> Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
2022-03-15 17:44:54 +08:00
ret;
SYM_FUNC_END(sm4_ce_expand_key)
.align 3
SYM_FUNC_START(sm4_ce_crypt_block)
/* input:
* x0: round key array, CTX
* x1: dst
* x2: src
*/
SM4_PREPARE(x0)
crypto: arm64/sm4 - add ARMv8 Crypto Extensions implementation This adds ARMv8 implementations of SM4 in ECB, CBC, CFB and CTR modes using Crypto Extensions, also includes key expansion operations because the Crypto Extensions instruction is much faster than software implementations. The Crypto Extensions for SM4 can only run on ARMv8 implementations that have support for these optional extensions. Benchmark on T-Head Yitian-710 2.75 GHz, the data comes from the 218 mode of tcrypt. The abscissas are blocks of different lengths. The data is tabulated and the unit is Mb/s: sm4-generic | 16 64 128 256 1024 1420 4096 ECB enc | 80.05 91.42 93.66 94.77 95.69 95.77 95.86 ECB dec | 79.98 91.41 93.64 94.76 95.66 95.77 95.85 CBC enc | 78.55 86.50 88.02 88.77 89.36 89.42 89.48 CBC dec | 76.82 89.06 91.52 92.77 93.75 93.83 93.96 CFB enc | 77.64 86.13 87.62 88.42 89.08 88.83 89.18 CFB dec | 77.57 88.34 90.36 91.45 92.34 92.00 92.44 CTR enc | 77.80 88.28 90.23 91.22 92.11 91.81 92.25 CTR dec | 77.83 88.22 90.22 91.22 92.04 91.82 92.28 sm4-neon ECB enc | 28.31 112.77 203.03 209.89 215.49 202.11 210.59 ECB dec | 28.36 113.45 203.23 210.00 215.52 202.13 210.65 CBC enc | 79.32 87.02 88.51 89.28 89.85 89.89 89.97 CBC dec | 28.29 112.20 203.30 209.82 214.99 201.51 209.95 CFB enc | 79.59 87.16 88.54 89.30 89.83 89.62 89.92 CFB dec | 28.12 111.05 202.47 209.02 214.21 210.90 209.12 CTR enc | 28.04 108.81 200.62 206.65 211.78 208.78 206.74 CTR dec | 28.02 108.82 200.45 206.62 211.78 208.74 206.70 sm4-ce-cipher ECB enc | 336.79 587.13 682.70 747.37 803.75 811.52 818.06 ECB dec | 339.18 584.52 679.72 743.68 798.82 803.83 811.54 CBC enc | 316.63 521.47 597.00 647.14 690.82 695.21 700.55 CBC dec | 291.80 503.79 585.66 640.82 689.86 695.16 701.72 CFB enc | 294.79 482.31 552.13 594.71 631.60 628.91 638.92 CFB dec | 293.09 466.44 526.56 563.17 594.41 592.26 601.97 CTR enc | 309.61 506.13 576.86 620.47 656.38 654.51 665.10 CTR dec | 306.69 505.57 576.84 620.18 657.09 654.52 665.32 sm4-ce ECB enc | 366.96 1329.81 2024.29 2755.50 3790.07 3861.91 4051.40 ECB dec | 367.30 1323.93 2018.72 2747.43 3787.39 3862.55 4052.62 CBC enc | 358.09 682.68 807.24 885.35 958.29 963.60 973.73 CBC dec | 366.51 1303.63 1978.64 2667.93 3624.53 3683.41 3856.08 CFB enc | 351.51 681.26 807.81 893.10 968.54 969.17 985.83 CFB dec | 354.98 1266.61 1929.63 2634.81 3614.23 3611.59 3841.68 CTR enc | 324.23 1121.25 1689.44 2256.70 2981.90 3007.79 3060.74 CTR dec | 324.18 1120.44 1694.31 2258.32 2982.01 3010.09 3060.99 Signed-off-by: Tianjia Zhang <tianjia.zhang@linux.alibaba.com> Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
2022-03-15 17:44:54 +08:00
ld1 {v0.16b}, [x2];
SM4_CRYPT_BLK(v0);
st1 {v0.16b}, [x1];
ret;
SYM_FUNC_END(sm4_ce_crypt_block)
.align 3
SYM_FUNC_START(sm4_ce_crypt)
/* input:
* x0: round key array, CTX
* x1: dst
* x2: src
* w3: nblocks
*/
SM4_PREPARE(x0)
crypto: arm64/sm4 - add ARMv8 Crypto Extensions implementation This adds ARMv8 implementations of SM4 in ECB, CBC, CFB and CTR modes using Crypto Extensions, also includes key expansion operations because the Crypto Extensions instruction is much faster than software implementations. The Crypto Extensions for SM4 can only run on ARMv8 implementations that have support for these optional extensions. Benchmark on T-Head Yitian-710 2.75 GHz, the data comes from the 218 mode of tcrypt. The abscissas are blocks of different lengths. The data is tabulated and the unit is Mb/s: sm4-generic | 16 64 128 256 1024 1420 4096 ECB enc | 80.05 91.42 93.66 94.77 95.69 95.77 95.86 ECB dec | 79.98 91.41 93.64 94.76 95.66 95.77 95.85 CBC enc | 78.55 86.50 88.02 88.77 89.36 89.42 89.48 CBC dec | 76.82 89.06 91.52 92.77 93.75 93.83 93.96 CFB enc | 77.64 86.13 87.62 88.42 89.08 88.83 89.18 CFB dec | 77.57 88.34 90.36 91.45 92.34 92.00 92.44 CTR enc | 77.80 88.28 90.23 91.22 92.11 91.81 92.25 CTR dec | 77.83 88.22 90.22 91.22 92.04 91.82 92.28 sm4-neon ECB enc | 28.31 112.77 203.03 209.89 215.49 202.11 210.59 ECB dec | 28.36 113.45 203.23 210.00 215.52 202.13 210.65 CBC enc | 79.32 87.02 88.51 89.28 89.85 89.89 89.97 CBC dec | 28.29 112.20 203.30 209.82 214.99 201.51 209.95 CFB enc | 79.59 87.16 88.54 89.30 89.83 89.62 89.92 CFB dec | 28.12 111.05 202.47 209.02 214.21 210.90 209.12 CTR enc | 28.04 108.81 200.62 206.65 211.78 208.78 206.74 CTR dec | 28.02 108.82 200.45 206.62 211.78 208.74 206.70 sm4-ce-cipher ECB enc | 336.79 587.13 682.70 747.37 803.75 811.52 818.06 ECB dec | 339.18 584.52 679.72 743.68 798.82 803.83 811.54 CBC enc | 316.63 521.47 597.00 647.14 690.82 695.21 700.55 CBC dec | 291.80 503.79 585.66 640.82 689.86 695.16 701.72 CFB enc | 294.79 482.31 552.13 594.71 631.60 628.91 638.92 CFB dec | 293.09 466.44 526.56 563.17 594.41 592.26 601.97 CTR enc | 309.61 506.13 576.86 620.47 656.38 654.51 665.10 CTR dec | 306.69 505.57 576.84 620.18 657.09 654.52 665.32 sm4-ce ECB enc | 366.96 1329.81 2024.29 2755.50 3790.07 3861.91 4051.40 ECB dec | 367.30 1323.93 2018.72 2747.43 3787.39 3862.55 4052.62 CBC enc | 358.09 682.68 807.24 885.35 958.29 963.60 973.73 CBC dec | 366.51 1303.63 1978.64 2667.93 3624.53 3683.41 3856.08 CFB enc | 351.51 681.26 807.81 893.10 968.54 969.17 985.83 CFB dec | 354.98 1266.61 1929.63 2634.81 3614.23 3611.59 3841.68 CTR enc | 324.23 1121.25 1689.44 2256.70 2981.90 3007.79 3060.74 CTR dec | 324.18 1120.44 1694.31 2258.32 2982.01 3010.09 3060.99 Signed-off-by: Tianjia Zhang <tianjia.zhang@linux.alibaba.com> Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
2022-03-15 17:44:54 +08:00
.Lcrypt_loop_blk:
sub w3, w3, #8;
tbnz w3, #31, .Lcrypt_tail8;
ld1 {v0.16b-v3.16b}, [x2], #64;
ld1 {v4.16b-v7.16b}, [x2], #64;
SM4_CRYPT_BLK8(v0, v1, v2, v3, v4, v5, v6, v7);
st1 {v0.16b-v3.16b}, [x1], #64;
st1 {v4.16b-v7.16b}, [x1], #64;
cbz w3, .Lcrypt_end;
b .Lcrypt_loop_blk;
.Lcrypt_tail8:
add w3, w3, #8;
cmp w3, #4;
blt .Lcrypt_tail4;
sub w3, w3, #4;
ld1 {v0.16b-v3.16b}, [x2], #64;
SM4_CRYPT_BLK4(v0, v1, v2, v3);
st1 {v0.16b-v3.16b}, [x1], #64;
cbz w3, .Lcrypt_end;
.Lcrypt_tail4:
sub w3, w3, #1;
ld1 {v0.16b}, [x2], #16;
SM4_CRYPT_BLK(v0);
st1 {v0.16b}, [x1], #16;
cbnz w3, .Lcrypt_tail4;
.Lcrypt_end:
ret;
SYM_FUNC_END(sm4_ce_crypt)
.align 3
SYM_FUNC_START(sm4_ce_cbc_enc)
/* input:
* x0: round key array, CTX
* x1: dst
* x2: src
* x3: iv (big endian, 128 bit)
* w4: nblocks
*/
SM4_PREPARE(x0)
ld1 {RIV.16b}, [x3]
.Lcbc_enc_loop_4x:
cmp w4, #4
blt .Lcbc_enc_loop_1x
sub w4, w4, #4
crypto: arm64/sm4 - add ARMv8 Crypto Extensions implementation This adds ARMv8 implementations of SM4 in ECB, CBC, CFB and CTR modes using Crypto Extensions, also includes key expansion operations because the Crypto Extensions instruction is much faster than software implementations. The Crypto Extensions for SM4 can only run on ARMv8 implementations that have support for these optional extensions. Benchmark on T-Head Yitian-710 2.75 GHz, the data comes from the 218 mode of tcrypt. The abscissas are blocks of different lengths. The data is tabulated and the unit is Mb/s: sm4-generic | 16 64 128 256 1024 1420 4096 ECB enc | 80.05 91.42 93.66 94.77 95.69 95.77 95.86 ECB dec | 79.98 91.41 93.64 94.76 95.66 95.77 95.85 CBC enc | 78.55 86.50 88.02 88.77 89.36 89.42 89.48 CBC dec | 76.82 89.06 91.52 92.77 93.75 93.83 93.96 CFB enc | 77.64 86.13 87.62 88.42 89.08 88.83 89.18 CFB dec | 77.57 88.34 90.36 91.45 92.34 92.00 92.44 CTR enc | 77.80 88.28 90.23 91.22 92.11 91.81 92.25 CTR dec | 77.83 88.22 90.22 91.22 92.04 91.82 92.28 sm4-neon ECB enc | 28.31 112.77 203.03 209.89 215.49 202.11 210.59 ECB dec | 28.36 113.45 203.23 210.00 215.52 202.13 210.65 CBC enc | 79.32 87.02 88.51 89.28 89.85 89.89 89.97 CBC dec | 28.29 112.20 203.30 209.82 214.99 201.51 209.95 CFB enc | 79.59 87.16 88.54 89.30 89.83 89.62 89.92 CFB dec | 28.12 111.05 202.47 209.02 214.21 210.90 209.12 CTR enc | 28.04 108.81 200.62 206.65 211.78 208.78 206.74 CTR dec | 28.02 108.82 200.45 206.62 211.78 208.74 206.70 sm4-ce-cipher ECB enc | 336.79 587.13 682.70 747.37 803.75 811.52 818.06 ECB dec | 339.18 584.52 679.72 743.68 798.82 803.83 811.54 CBC enc | 316.63 521.47 597.00 647.14 690.82 695.21 700.55 CBC dec | 291.80 503.79 585.66 640.82 689.86 695.16 701.72 CFB enc | 294.79 482.31 552.13 594.71 631.60 628.91 638.92 CFB dec | 293.09 466.44 526.56 563.17 594.41 592.26 601.97 CTR enc | 309.61 506.13 576.86 620.47 656.38 654.51 665.10 CTR dec | 306.69 505.57 576.84 620.18 657.09 654.52 665.32 sm4-ce ECB enc | 366.96 1329.81 2024.29 2755.50 3790.07 3861.91 4051.40 ECB dec | 367.30 1323.93 2018.72 2747.43 3787.39 3862.55 4052.62 CBC enc | 358.09 682.68 807.24 885.35 958.29 963.60 973.73 CBC dec | 366.51 1303.63 1978.64 2667.93 3624.53 3683.41 3856.08 CFB enc | 351.51 681.26 807.81 893.10 968.54 969.17 985.83 CFB dec | 354.98 1266.61 1929.63 2634.81 3614.23 3611.59 3841.68 CTR enc | 324.23 1121.25 1689.44 2256.70 2981.90 3007.79 3060.74 CTR dec | 324.18 1120.44 1694.31 2258.32 2982.01 3010.09 3060.99 Signed-off-by: Tianjia Zhang <tianjia.zhang@linux.alibaba.com> Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
2022-03-15 17:44:54 +08:00
ld1 {v0.16b-v3.16b}, [x2], #64
crypto: arm64/sm4 - add ARMv8 Crypto Extensions implementation This adds ARMv8 implementations of SM4 in ECB, CBC, CFB and CTR modes using Crypto Extensions, also includes key expansion operations because the Crypto Extensions instruction is much faster than software implementations. The Crypto Extensions for SM4 can only run on ARMv8 implementations that have support for these optional extensions. Benchmark on T-Head Yitian-710 2.75 GHz, the data comes from the 218 mode of tcrypt. The abscissas are blocks of different lengths. The data is tabulated and the unit is Mb/s: sm4-generic | 16 64 128 256 1024 1420 4096 ECB enc | 80.05 91.42 93.66 94.77 95.69 95.77 95.86 ECB dec | 79.98 91.41 93.64 94.76 95.66 95.77 95.85 CBC enc | 78.55 86.50 88.02 88.77 89.36 89.42 89.48 CBC dec | 76.82 89.06 91.52 92.77 93.75 93.83 93.96 CFB enc | 77.64 86.13 87.62 88.42 89.08 88.83 89.18 CFB dec | 77.57 88.34 90.36 91.45 92.34 92.00 92.44 CTR enc | 77.80 88.28 90.23 91.22 92.11 91.81 92.25 CTR dec | 77.83 88.22 90.22 91.22 92.04 91.82 92.28 sm4-neon ECB enc | 28.31 112.77 203.03 209.89 215.49 202.11 210.59 ECB dec | 28.36 113.45 203.23 210.00 215.52 202.13 210.65 CBC enc | 79.32 87.02 88.51 89.28 89.85 89.89 89.97 CBC dec | 28.29 112.20 203.30 209.82 214.99 201.51 209.95 CFB enc | 79.59 87.16 88.54 89.30 89.83 89.62 89.92 CFB dec | 28.12 111.05 202.47 209.02 214.21 210.90 209.12 CTR enc | 28.04 108.81 200.62 206.65 211.78 208.78 206.74 CTR dec | 28.02 108.82 200.45 206.62 211.78 208.74 206.70 sm4-ce-cipher ECB enc | 336.79 587.13 682.70 747.37 803.75 811.52 818.06 ECB dec | 339.18 584.52 679.72 743.68 798.82 803.83 811.54 CBC enc | 316.63 521.47 597.00 647.14 690.82 695.21 700.55 CBC dec | 291.80 503.79 585.66 640.82 689.86 695.16 701.72 CFB enc | 294.79 482.31 552.13 594.71 631.60 628.91 638.92 CFB dec | 293.09 466.44 526.56 563.17 594.41 592.26 601.97 CTR enc | 309.61 506.13 576.86 620.47 656.38 654.51 665.10 CTR dec | 306.69 505.57 576.84 620.18 657.09 654.52 665.32 sm4-ce ECB enc | 366.96 1329.81 2024.29 2755.50 3790.07 3861.91 4051.40 ECB dec | 367.30 1323.93 2018.72 2747.43 3787.39 3862.55 4052.62 CBC enc | 358.09 682.68 807.24 885.35 958.29 963.60 973.73 CBC dec | 366.51 1303.63 1978.64 2667.93 3624.53 3683.41 3856.08 CFB enc | 351.51 681.26 807.81 893.10 968.54 969.17 985.83 CFB dec | 354.98 1266.61 1929.63 2634.81 3614.23 3611.59 3841.68 CTR enc | 324.23 1121.25 1689.44 2256.70 2981.90 3007.79 3060.74 CTR dec | 324.18 1120.44 1694.31 2258.32 2982.01 3010.09 3060.99 Signed-off-by: Tianjia Zhang <tianjia.zhang@linux.alibaba.com> Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
2022-03-15 17:44:54 +08:00
eor v0.16b, v0.16b, RIV.16b
SM4_CRYPT_BLK(v0)
eor v1.16b, v1.16b, v0.16b
SM4_CRYPT_BLK(v1)
eor v2.16b, v2.16b, v1.16b
SM4_CRYPT_BLK(v2)
eor v3.16b, v3.16b, v2.16b
SM4_CRYPT_BLK(v3)
crypto: arm64/sm4 - add ARMv8 Crypto Extensions implementation This adds ARMv8 implementations of SM4 in ECB, CBC, CFB and CTR modes using Crypto Extensions, also includes key expansion operations because the Crypto Extensions instruction is much faster than software implementations. The Crypto Extensions for SM4 can only run on ARMv8 implementations that have support for these optional extensions. Benchmark on T-Head Yitian-710 2.75 GHz, the data comes from the 218 mode of tcrypt. The abscissas are blocks of different lengths. The data is tabulated and the unit is Mb/s: sm4-generic | 16 64 128 256 1024 1420 4096 ECB enc | 80.05 91.42 93.66 94.77 95.69 95.77 95.86 ECB dec | 79.98 91.41 93.64 94.76 95.66 95.77 95.85 CBC enc | 78.55 86.50 88.02 88.77 89.36 89.42 89.48 CBC dec | 76.82 89.06 91.52 92.77 93.75 93.83 93.96 CFB enc | 77.64 86.13 87.62 88.42 89.08 88.83 89.18 CFB dec | 77.57 88.34 90.36 91.45 92.34 92.00 92.44 CTR enc | 77.80 88.28 90.23 91.22 92.11 91.81 92.25 CTR dec | 77.83 88.22 90.22 91.22 92.04 91.82 92.28 sm4-neon ECB enc | 28.31 112.77 203.03 209.89 215.49 202.11 210.59 ECB dec | 28.36 113.45 203.23 210.00 215.52 202.13 210.65 CBC enc | 79.32 87.02 88.51 89.28 89.85 89.89 89.97 CBC dec | 28.29 112.20 203.30 209.82 214.99 201.51 209.95 CFB enc | 79.59 87.16 88.54 89.30 89.83 89.62 89.92 CFB dec | 28.12 111.05 202.47 209.02 214.21 210.90 209.12 CTR enc | 28.04 108.81 200.62 206.65 211.78 208.78 206.74 CTR dec | 28.02 108.82 200.45 206.62 211.78 208.74 206.70 sm4-ce-cipher ECB enc | 336.79 587.13 682.70 747.37 803.75 811.52 818.06 ECB dec | 339.18 584.52 679.72 743.68 798.82 803.83 811.54 CBC enc | 316.63 521.47 597.00 647.14 690.82 695.21 700.55 CBC dec | 291.80 503.79 585.66 640.82 689.86 695.16 701.72 CFB enc | 294.79 482.31 552.13 594.71 631.60 628.91 638.92 CFB dec | 293.09 466.44 526.56 563.17 594.41 592.26 601.97 CTR enc | 309.61 506.13 576.86 620.47 656.38 654.51 665.10 CTR dec | 306.69 505.57 576.84 620.18 657.09 654.52 665.32 sm4-ce ECB enc | 366.96 1329.81 2024.29 2755.50 3790.07 3861.91 4051.40 ECB dec | 367.30 1323.93 2018.72 2747.43 3787.39 3862.55 4052.62 CBC enc | 358.09 682.68 807.24 885.35 958.29 963.60 973.73 CBC dec | 366.51 1303.63 1978.64 2667.93 3624.53 3683.41 3856.08 CFB enc | 351.51 681.26 807.81 893.10 968.54 969.17 985.83 CFB dec | 354.98 1266.61 1929.63 2634.81 3614.23 3611.59 3841.68 CTR enc | 324.23 1121.25 1689.44 2256.70 2981.90 3007.79 3060.74 CTR dec | 324.18 1120.44 1694.31 2258.32 2982.01 3010.09 3060.99 Signed-off-by: Tianjia Zhang <tianjia.zhang@linux.alibaba.com> Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
2022-03-15 17:44:54 +08:00
st1 {v0.16b-v3.16b}, [x1], #64
mov RIV.16b, v3.16b
crypto: arm64/sm4 - add ARMv8 Crypto Extensions implementation This adds ARMv8 implementations of SM4 in ECB, CBC, CFB and CTR modes using Crypto Extensions, also includes key expansion operations because the Crypto Extensions instruction is much faster than software implementations. The Crypto Extensions for SM4 can only run on ARMv8 implementations that have support for these optional extensions. Benchmark on T-Head Yitian-710 2.75 GHz, the data comes from the 218 mode of tcrypt. The abscissas are blocks of different lengths. The data is tabulated and the unit is Mb/s: sm4-generic | 16 64 128 256 1024 1420 4096 ECB enc | 80.05 91.42 93.66 94.77 95.69 95.77 95.86 ECB dec | 79.98 91.41 93.64 94.76 95.66 95.77 95.85 CBC enc | 78.55 86.50 88.02 88.77 89.36 89.42 89.48 CBC dec | 76.82 89.06 91.52 92.77 93.75 93.83 93.96 CFB enc | 77.64 86.13 87.62 88.42 89.08 88.83 89.18 CFB dec | 77.57 88.34 90.36 91.45 92.34 92.00 92.44 CTR enc | 77.80 88.28 90.23 91.22 92.11 91.81 92.25 CTR dec | 77.83 88.22 90.22 91.22 92.04 91.82 92.28 sm4-neon ECB enc | 28.31 112.77 203.03 209.89 215.49 202.11 210.59 ECB dec | 28.36 113.45 203.23 210.00 215.52 202.13 210.65 CBC enc | 79.32 87.02 88.51 89.28 89.85 89.89 89.97 CBC dec | 28.29 112.20 203.30 209.82 214.99 201.51 209.95 CFB enc | 79.59 87.16 88.54 89.30 89.83 89.62 89.92 CFB dec | 28.12 111.05 202.47 209.02 214.21 210.90 209.12 CTR enc | 28.04 108.81 200.62 206.65 211.78 208.78 206.74 CTR dec | 28.02 108.82 200.45 206.62 211.78 208.74 206.70 sm4-ce-cipher ECB enc | 336.79 587.13 682.70 747.37 803.75 811.52 818.06 ECB dec | 339.18 584.52 679.72 743.68 798.82 803.83 811.54 CBC enc | 316.63 521.47 597.00 647.14 690.82 695.21 700.55 CBC dec | 291.80 503.79 585.66 640.82 689.86 695.16 701.72 CFB enc | 294.79 482.31 552.13 594.71 631.60 628.91 638.92 CFB dec | 293.09 466.44 526.56 563.17 594.41 592.26 601.97 CTR enc | 309.61 506.13 576.86 620.47 656.38 654.51 665.10 CTR dec | 306.69 505.57 576.84 620.18 657.09 654.52 665.32 sm4-ce ECB enc | 366.96 1329.81 2024.29 2755.50 3790.07 3861.91 4051.40 ECB dec | 367.30 1323.93 2018.72 2747.43 3787.39 3862.55 4052.62 CBC enc | 358.09 682.68 807.24 885.35 958.29 963.60 973.73 CBC dec | 366.51 1303.63 1978.64 2667.93 3624.53 3683.41 3856.08 CFB enc | 351.51 681.26 807.81 893.10 968.54 969.17 985.83 CFB dec | 354.98 1266.61 1929.63 2634.81 3614.23 3611.59 3841.68 CTR enc | 324.23 1121.25 1689.44 2256.70 2981.90 3007.79 3060.74 CTR dec | 324.18 1120.44 1694.31 2258.32 2982.01 3010.09 3060.99 Signed-off-by: Tianjia Zhang <tianjia.zhang@linux.alibaba.com> Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
2022-03-15 17:44:54 +08:00
cbz w4, .Lcbc_enc_end
b .Lcbc_enc_loop_4x
crypto: arm64/sm4 - add ARMv8 Crypto Extensions implementation This adds ARMv8 implementations of SM4 in ECB, CBC, CFB and CTR modes using Crypto Extensions, also includes key expansion operations because the Crypto Extensions instruction is much faster than software implementations. The Crypto Extensions for SM4 can only run on ARMv8 implementations that have support for these optional extensions. Benchmark on T-Head Yitian-710 2.75 GHz, the data comes from the 218 mode of tcrypt. The abscissas are blocks of different lengths. The data is tabulated and the unit is Mb/s: sm4-generic | 16 64 128 256 1024 1420 4096 ECB enc | 80.05 91.42 93.66 94.77 95.69 95.77 95.86 ECB dec | 79.98 91.41 93.64 94.76 95.66 95.77 95.85 CBC enc | 78.55 86.50 88.02 88.77 89.36 89.42 89.48 CBC dec | 76.82 89.06 91.52 92.77 93.75 93.83 93.96 CFB enc | 77.64 86.13 87.62 88.42 89.08 88.83 89.18 CFB dec | 77.57 88.34 90.36 91.45 92.34 92.00 92.44 CTR enc | 77.80 88.28 90.23 91.22 92.11 91.81 92.25 CTR dec | 77.83 88.22 90.22 91.22 92.04 91.82 92.28 sm4-neon ECB enc | 28.31 112.77 203.03 209.89 215.49 202.11 210.59 ECB dec | 28.36 113.45 203.23 210.00 215.52 202.13 210.65 CBC enc | 79.32 87.02 88.51 89.28 89.85 89.89 89.97 CBC dec | 28.29 112.20 203.30 209.82 214.99 201.51 209.95 CFB enc | 79.59 87.16 88.54 89.30 89.83 89.62 89.92 CFB dec | 28.12 111.05 202.47 209.02 214.21 210.90 209.12 CTR enc | 28.04 108.81 200.62 206.65 211.78 208.78 206.74 CTR dec | 28.02 108.82 200.45 206.62 211.78 208.74 206.70 sm4-ce-cipher ECB enc | 336.79 587.13 682.70 747.37 803.75 811.52 818.06 ECB dec | 339.18 584.52 679.72 743.68 798.82 803.83 811.54 CBC enc | 316.63 521.47 597.00 647.14 690.82 695.21 700.55 CBC dec | 291.80 503.79 585.66 640.82 689.86 695.16 701.72 CFB enc | 294.79 482.31 552.13 594.71 631.60 628.91 638.92 CFB dec | 293.09 466.44 526.56 563.17 594.41 592.26 601.97 CTR enc | 309.61 506.13 576.86 620.47 656.38 654.51 665.10 CTR dec | 306.69 505.57 576.84 620.18 657.09 654.52 665.32 sm4-ce ECB enc | 366.96 1329.81 2024.29 2755.50 3790.07 3861.91 4051.40 ECB dec | 367.30 1323.93 2018.72 2747.43 3787.39 3862.55 4052.62 CBC enc | 358.09 682.68 807.24 885.35 958.29 963.60 973.73 CBC dec | 366.51 1303.63 1978.64 2667.93 3624.53 3683.41 3856.08 CFB enc | 351.51 681.26 807.81 893.10 968.54 969.17 985.83 CFB dec | 354.98 1266.61 1929.63 2634.81 3614.23 3611.59 3841.68 CTR enc | 324.23 1121.25 1689.44 2256.70 2981.90 3007.79 3060.74 CTR dec | 324.18 1120.44 1694.31 2258.32 2982.01 3010.09 3060.99 Signed-off-by: Tianjia Zhang <tianjia.zhang@linux.alibaba.com> Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
2022-03-15 17:44:54 +08:00
.Lcbc_enc_loop_1x:
sub w4, w4, #1
crypto: arm64/sm4 - add ARMv8 Crypto Extensions implementation This adds ARMv8 implementations of SM4 in ECB, CBC, CFB and CTR modes using Crypto Extensions, also includes key expansion operations because the Crypto Extensions instruction is much faster than software implementations. The Crypto Extensions for SM4 can only run on ARMv8 implementations that have support for these optional extensions. Benchmark on T-Head Yitian-710 2.75 GHz, the data comes from the 218 mode of tcrypt. The abscissas are blocks of different lengths. The data is tabulated and the unit is Mb/s: sm4-generic | 16 64 128 256 1024 1420 4096 ECB enc | 80.05 91.42 93.66 94.77 95.69 95.77 95.86 ECB dec | 79.98 91.41 93.64 94.76 95.66 95.77 95.85 CBC enc | 78.55 86.50 88.02 88.77 89.36 89.42 89.48 CBC dec | 76.82 89.06 91.52 92.77 93.75 93.83 93.96 CFB enc | 77.64 86.13 87.62 88.42 89.08 88.83 89.18 CFB dec | 77.57 88.34 90.36 91.45 92.34 92.00 92.44 CTR enc | 77.80 88.28 90.23 91.22 92.11 91.81 92.25 CTR dec | 77.83 88.22 90.22 91.22 92.04 91.82 92.28 sm4-neon ECB enc | 28.31 112.77 203.03 209.89 215.49 202.11 210.59 ECB dec | 28.36 113.45 203.23 210.00 215.52 202.13 210.65 CBC enc | 79.32 87.02 88.51 89.28 89.85 89.89 89.97 CBC dec | 28.29 112.20 203.30 209.82 214.99 201.51 209.95 CFB enc | 79.59 87.16 88.54 89.30 89.83 89.62 89.92 CFB dec | 28.12 111.05 202.47 209.02 214.21 210.90 209.12 CTR enc | 28.04 108.81 200.62 206.65 211.78 208.78 206.74 CTR dec | 28.02 108.82 200.45 206.62 211.78 208.74 206.70 sm4-ce-cipher ECB enc | 336.79 587.13 682.70 747.37 803.75 811.52 818.06 ECB dec | 339.18 584.52 679.72 743.68 798.82 803.83 811.54 CBC enc | 316.63 521.47 597.00 647.14 690.82 695.21 700.55 CBC dec | 291.80 503.79 585.66 640.82 689.86 695.16 701.72 CFB enc | 294.79 482.31 552.13 594.71 631.60 628.91 638.92 CFB dec | 293.09 466.44 526.56 563.17 594.41 592.26 601.97 CTR enc | 309.61 506.13 576.86 620.47 656.38 654.51 665.10 CTR dec | 306.69 505.57 576.84 620.18 657.09 654.52 665.32 sm4-ce ECB enc | 366.96 1329.81 2024.29 2755.50 3790.07 3861.91 4051.40 ECB dec | 367.30 1323.93 2018.72 2747.43 3787.39 3862.55 4052.62 CBC enc | 358.09 682.68 807.24 885.35 958.29 963.60 973.73 CBC dec | 366.51 1303.63 1978.64 2667.93 3624.53 3683.41 3856.08 CFB enc | 351.51 681.26 807.81 893.10 968.54 969.17 985.83 CFB dec | 354.98 1266.61 1929.63 2634.81 3614.23 3611.59 3841.68 CTR enc | 324.23 1121.25 1689.44 2256.70 2981.90 3007.79 3060.74 CTR dec | 324.18 1120.44 1694.31 2258.32 2982.01 3010.09 3060.99 Signed-off-by: Tianjia Zhang <tianjia.zhang@linux.alibaba.com> Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
2022-03-15 17:44:54 +08:00
ld1 {v0.16b}, [x2], #16
crypto: arm64/sm4 - add ARMv8 Crypto Extensions implementation This adds ARMv8 implementations of SM4 in ECB, CBC, CFB and CTR modes using Crypto Extensions, also includes key expansion operations because the Crypto Extensions instruction is much faster than software implementations. The Crypto Extensions for SM4 can only run on ARMv8 implementations that have support for these optional extensions. Benchmark on T-Head Yitian-710 2.75 GHz, the data comes from the 218 mode of tcrypt. The abscissas are blocks of different lengths. The data is tabulated and the unit is Mb/s: sm4-generic | 16 64 128 256 1024 1420 4096 ECB enc | 80.05 91.42 93.66 94.77 95.69 95.77 95.86 ECB dec | 79.98 91.41 93.64 94.76 95.66 95.77 95.85 CBC enc | 78.55 86.50 88.02 88.77 89.36 89.42 89.48 CBC dec | 76.82 89.06 91.52 92.77 93.75 93.83 93.96 CFB enc | 77.64 86.13 87.62 88.42 89.08 88.83 89.18 CFB dec | 77.57 88.34 90.36 91.45 92.34 92.00 92.44 CTR enc | 77.80 88.28 90.23 91.22 92.11 91.81 92.25 CTR dec | 77.83 88.22 90.22 91.22 92.04 91.82 92.28 sm4-neon ECB enc | 28.31 112.77 203.03 209.89 215.49 202.11 210.59 ECB dec | 28.36 113.45 203.23 210.00 215.52 202.13 210.65 CBC enc | 79.32 87.02 88.51 89.28 89.85 89.89 89.97 CBC dec | 28.29 112.20 203.30 209.82 214.99 201.51 209.95 CFB enc | 79.59 87.16 88.54 89.30 89.83 89.62 89.92 CFB dec | 28.12 111.05 202.47 209.02 214.21 210.90 209.12 CTR enc | 28.04 108.81 200.62 206.65 211.78 208.78 206.74 CTR dec | 28.02 108.82 200.45 206.62 211.78 208.74 206.70 sm4-ce-cipher ECB enc | 336.79 587.13 682.70 747.37 803.75 811.52 818.06 ECB dec | 339.18 584.52 679.72 743.68 798.82 803.83 811.54 CBC enc | 316.63 521.47 597.00 647.14 690.82 695.21 700.55 CBC dec | 291.80 503.79 585.66 640.82 689.86 695.16 701.72 CFB enc | 294.79 482.31 552.13 594.71 631.60 628.91 638.92 CFB dec | 293.09 466.44 526.56 563.17 594.41 592.26 601.97 CTR enc | 309.61 506.13 576.86 620.47 656.38 654.51 665.10 CTR dec | 306.69 505.57 576.84 620.18 657.09 654.52 665.32 sm4-ce ECB enc | 366.96 1329.81 2024.29 2755.50 3790.07 3861.91 4051.40 ECB dec | 367.30 1323.93 2018.72 2747.43 3787.39 3862.55 4052.62 CBC enc | 358.09 682.68 807.24 885.35 958.29 963.60 973.73 CBC dec | 366.51 1303.63 1978.64 2667.93 3624.53 3683.41 3856.08 CFB enc | 351.51 681.26 807.81 893.10 968.54 969.17 985.83 CFB dec | 354.98 1266.61 1929.63 2634.81 3614.23 3611.59 3841.68 CTR enc | 324.23 1121.25 1689.44 2256.70 2981.90 3007.79 3060.74 CTR dec | 324.18 1120.44 1694.31 2258.32 2982.01 3010.09 3060.99 Signed-off-by: Tianjia Zhang <tianjia.zhang@linux.alibaba.com> Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
2022-03-15 17:44:54 +08:00
eor RIV.16b, RIV.16b, v0.16b
SM4_CRYPT_BLK(RIV)
st1 {RIV.16b}, [x1], #16
cbnz w4, .Lcbc_enc_loop_1x
.Lcbc_enc_end:
crypto: arm64/sm4 - add ARMv8 Crypto Extensions implementation This adds ARMv8 implementations of SM4 in ECB, CBC, CFB and CTR modes using Crypto Extensions, also includes key expansion operations because the Crypto Extensions instruction is much faster than software implementations. The Crypto Extensions for SM4 can only run on ARMv8 implementations that have support for these optional extensions. Benchmark on T-Head Yitian-710 2.75 GHz, the data comes from the 218 mode of tcrypt. The abscissas are blocks of different lengths. The data is tabulated and the unit is Mb/s: sm4-generic | 16 64 128 256 1024 1420 4096 ECB enc | 80.05 91.42 93.66 94.77 95.69 95.77 95.86 ECB dec | 79.98 91.41 93.64 94.76 95.66 95.77 95.85 CBC enc | 78.55 86.50 88.02 88.77 89.36 89.42 89.48 CBC dec | 76.82 89.06 91.52 92.77 93.75 93.83 93.96 CFB enc | 77.64 86.13 87.62 88.42 89.08 88.83 89.18 CFB dec | 77.57 88.34 90.36 91.45 92.34 92.00 92.44 CTR enc | 77.80 88.28 90.23 91.22 92.11 91.81 92.25 CTR dec | 77.83 88.22 90.22 91.22 92.04 91.82 92.28 sm4-neon ECB enc | 28.31 112.77 203.03 209.89 215.49 202.11 210.59 ECB dec | 28.36 113.45 203.23 210.00 215.52 202.13 210.65 CBC enc | 79.32 87.02 88.51 89.28 89.85 89.89 89.97 CBC dec | 28.29 112.20 203.30 209.82 214.99 201.51 209.95 CFB enc | 79.59 87.16 88.54 89.30 89.83 89.62 89.92 CFB dec | 28.12 111.05 202.47 209.02 214.21 210.90 209.12 CTR enc | 28.04 108.81 200.62 206.65 211.78 208.78 206.74 CTR dec | 28.02 108.82 200.45 206.62 211.78 208.74 206.70 sm4-ce-cipher ECB enc | 336.79 587.13 682.70 747.37 803.75 811.52 818.06 ECB dec | 339.18 584.52 679.72 743.68 798.82 803.83 811.54 CBC enc | 316.63 521.47 597.00 647.14 690.82 695.21 700.55 CBC dec | 291.80 503.79 585.66 640.82 689.86 695.16 701.72 CFB enc | 294.79 482.31 552.13 594.71 631.60 628.91 638.92 CFB dec | 293.09 466.44 526.56 563.17 594.41 592.26 601.97 CTR enc | 309.61 506.13 576.86 620.47 656.38 654.51 665.10 CTR dec | 306.69 505.57 576.84 620.18 657.09 654.52 665.32 sm4-ce ECB enc | 366.96 1329.81 2024.29 2755.50 3790.07 3861.91 4051.40 ECB dec | 367.30 1323.93 2018.72 2747.43 3787.39 3862.55 4052.62 CBC enc | 358.09 682.68 807.24 885.35 958.29 963.60 973.73 CBC dec | 366.51 1303.63 1978.64 2667.93 3624.53 3683.41 3856.08 CFB enc | 351.51 681.26 807.81 893.10 968.54 969.17 985.83 CFB dec | 354.98 1266.61 1929.63 2634.81 3614.23 3611.59 3841.68 CTR enc | 324.23 1121.25 1689.44 2256.70 2981.90 3007.79 3060.74 CTR dec | 324.18 1120.44 1694.31 2258.32 2982.01 3010.09 3060.99 Signed-off-by: Tianjia Zhang <tianjia.zhang@linux.alibaba.com> Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
2022-03-15 17:44:54 +08:00
/* store new IV */
st1 {RIV.16b}, [x3]
crypto: arm64/sm4 - add ARMv8 Crypto Extensions implementation This adds ARMv8 implementations of SM4 in ECB, CBC, CFB and CTR modes using Crypto Extensions, also includes key expansion operations because the Crypto Extensions instruction is much faster than software implementations. The Crypto Extensions for SM4 can only run on ARMv8 implementations that have support for these optional extensions. Benchmark on T-Head Yitian-710 2.75 GHz, the data comes from the 218 mode of tcrypt. The abscissas are blocks of different lengths. The data is tabulated and the unit is Mb/s: sm4-generic | 16 64 128 256 1024 1420 4096 ECB enc | 80.05 91.42 93.66 94.77 95.69 95.77 95.86 ECB dec | 79.98 91.41 93.64 94.76 95.66 95.77 95.85 CBC enc | 78.55 86.50 88.02 88.77 89.36 89.42 89.48 CBC dec | 76.82 89.06 91.52 92.77 93.75 93.83 93.96 CFB enc | 77.64 86.13 87.62 88.42 89.08 88.83 89.18 CFB dec | 77.57 88.34 90.36 91.45 92.34 92.00 92.44 CTR enc | 77.80 88.28 90.23 91.22 92.11 91.81 92.25 CTR dec | 77.83 88.22 90.22 91.22 92.04 91.82 92.28 sm4-neon ECB enc | 28.31 112.77 203.03 209.89 215.49 202.11 210.59 ECB dec | 28.36 113.45 203.23 210.00 215.52 202.13 210.65 CBC enc | 79.32 87.02 88.51 89.28 89.85 89.89 89.97 CBC dec | 28.29 112.20 203.30 209.82 214.99 201.51 209.95 CFB enc | 79.59 87.16 88.54 89.30 89.83 89.62 89.92 CFB dec | 28.12 111.05 202.47 209.02 214.21 210.90 209.12 CTR enc | 28.04 108.81 200.62 206.65 211.78 208.78 206.74 CTR dec | 28.02 108.82 200.45 206.62 211.78 208.74 206.70 sm4-ce-cipher ECB enc | 336.79 587.13 682.70 747.37 803.75 811.52 818.06 ECB dec | 339.18 584.52 679.72 743.68 798.82 803.83 811.54 CBC enc | 316.63 521.47 597.00 647.14 690.82 695.21 700.55 CBC dec | 291.80 503.79 585.66 640.82 689.86 695.16 701.72 CFB enc | 294.79 482.31 552.13 594.71 631.60 628.91 638.92 CFB dec | 293.09 466.44 526.56 563.17 594.41 592.26 601.97 CTR enc | 309.61 506.13 576.86 620.47 656.38 654.51 665.10 CTR dec | 306.69 505.57 576.84 620.18 657.09 654.52 665.32 sm4-ce ECB enc | 366.96 1329.81 2024.29 2755.50 3790.07 3861.91 4051.40 ECB dec | 367.30 1323.93 2018.72 2747.43 3787.39 3862.55 4052.62 CBC enc | 358.09 682.68 807.24 885.35 958.29 963.60 973.73 CBC dec | 366.51 1303.63 1978.64 2667.93 3624.53 3683.41 3856.08 CFB enc | 351.51 681.26 807.81 893.10 968.54 969.17 985.83 CFB dec | 354.98 1266.61 1929.63 2634.81 3614.23 3611.59 3841.68 CTR enc | 324.23 1121.25 1689.44 2256.70 2981.90 3007.79 3060.74 CTR dec | 324.18 1120.44 1694.31 2258.32 2982.01 3010.09 3060.99 Signed-off-by: Tianjia Zhang <tianjia.zhang@linux.alibaba.com> Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
2022-03-15 17:44:54 +08:00
ret
crypto: arm64/sm4 - add ARMv8 Crypto Extensions implementation This adds ARMv8 implementations of SM4 in ECB, CBC, CFB and CTR modes using Crypto Extensions, also includes key expansion operations because the Crypto Extensions instruction is much faster than software implementations. The Crypto Extensions for SM4 can only run on ARMv8 implementations that have support for these optional extensions. Benchmark on T-Head Yitian-710 2.75 GHz, the data comes from the 218 mode of tcrypt. The abscissas are blocks of different lengths. The data is tabulated and the unit is Mb/s: sm4-generic | 16 64 128 256 1024 1420 4096 ECB enc | 80.05 91.42 93.66 94.77 95.69 95.77 95.86 ECB dec | 79.98 91.41 93.64 94.76 95.66 95.77 95.85 CBC enc | 78.55 86.50 88.02 88.77 89.36 89.42 89.48 CBC dec | 76.82 89.06 91.52 92.77 93.75 93.83 93.96 CFB enc | 77.64 86.13 87.62 88.42 89.08 88.83 89.18 CFB dec | 77.57 88.34 90.36 91.45 92.34 92.00 92.44 CTR enc | 77.80 88.28 90.23 91.22 92.11 91.81 92.25 CTR dec | 77.83 88.22 90.22 91.22 92.04 91.82 92.28 sm4-neon ECB enc | 28.31 112.77 203.03 209.89 215.49 202.11 210.59 ECB dec | 28.36 113.45 203.23 210.00 215.52 202.13 210.65 CBC enc | 79.32 87.02 88.51 89.28 89.85 89.89 89.97 CBC dec | 28.29 112.20 203.30 209.82 214.99 201.51 209.95 CFB enc | 79.59 87.16 88.54 89.30 89.83 89.62 89.92 CFB dec | 28.12 111.05 202.47 209.02 214.21 210.90 209.12 CTR enc | 28.04 108.81 200.62 206.65 211.78 208.78 206.74 CTR dec | 28.02 108.82 200.45 206.62 211.78 208.74 206.70 sm4-ce-cipher ECB enc | 336.79 587.13 682.70 747.37 803.75 811.52 818.06 ECB dec | 339.18 584.52 679.72 743.68 798.82 803.83 811.54 CBC enc | 316.63 521.47 597.00 647.14 690.82 695.21 700.55 CBC dec | 291.80 503.79 585.66 640.82 689.86 695.16 701.72 CFB enc | 294.79 482.31 552.13 594.71 631.60 628.91 638.92 CFB dec | 293.09 466.44 526.56 563.17 594.41 592.26 601.97 CTR enc | 309.61 506.13 576.86 620.47 656.38 654.51 665.10 CTR dec | 306.69 505.57 576.84 620.18 657.09 654.52 665.32 sm4-ce ECB enc | 366.96 1329.81 2024.29 2755.50 3790.07 3861.91 4051.40 ECB dec | 367.30 1323.93 2018.72 2747.43 3787.39 3862.55 4052.62 CBC enc | 358.09 682.68 807.24 885.35 958.29 963.60 973.73 CBC dec | 366.51 1303.63 1978.64 2667.93 3624.53 3683.41 3856.08 CFB enc | 351.51 681.26 807.81 893.10 968.54 969.17 985.83 CFB dec | 354.98 1266.61 1929.63 2634.81 3614.23 3611.59 3841.68 CTR enc | 324.23 1121.25 1689.44 2256.70 2981.90 3007.79 3060.74 CTR dec | 324.18 1120.44 1694.31 2258.32 2982.01 3010.09 3060.99 Signed-off-by: Tianjia Zhang <tianjia.zhang@linux.alibaba.com> Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
2022-03-15 17:44:54 +08:00
SYM_FUNC_END(sm4_ce_cbc_enc)
.align 3
SYM_FUNC_START(sm4_ce_cbc_dec)
/* input:
* x0: round key array, CTX
* x1: dst
* x2: src
* x3: iv (big endian, 128 bit)
* w4: nblocks
*/
SM4_PREPARE(x0)
crypto: arm64/sm4 - add ARMv8 Crypto Extensions implementation This adds ARMv8 implementations of SM4 in ECB, CBC, CFB and CTR modes using Crypto Extensions, also includes key expansion operations because the Crypto Extensions instruction is much faster than software implementations. The Crypto Extensions for SM4 can only run on ARMv8 implementations that have support for these optional extensions. Benchmark on T-Head Yitian-710 2.75 GHz, the data comes from the 218 mode of tcrypt. The abscissas are blocks of different lengths. The data is tabulated and the unit is Mb/s: sm4-generic | 16 64 128 256 1024 1420 4096 ECB enc | 80.05 91.42 93.66 94.77 95.69 95.77 95.86 ECB dec | 79.98 91.41 93.64 94.76 95.66 95.77 95.85 CBC enc | 78.55 86.50 88.02 88.77 89.36 89.42 89.48 CBC dec | 76.82 89.06 91.52 92.77 93.75 93.83 93.96 CFB enc | 77.64 86.13 87.62 88.42 89.08 88.83 89.18 CFB dec | 77.57 88.34 90.36 91.45 92.34 92.00 92.44 CTR enc | 77.80 88.28 90.23 91.22 92.11 91.81 92.25 CTR dec | 77.83 88.22 90.22 91.22 92.04 91.82 92.28 sm4-neon ECB enc | 28.31 112.77 203.03 209.89 215.49 202.11 210.59 ECB dec | 28.36 113.45 203.23 210.00 215.52 202.13 210.65 CBC enc | 79.32 87.02 88.51 89.28 89.85 89.89 89.97 CBC dec | 28.29 112.20 203.30 209.82 214.99 201.51 209.95 CFB enc | 79.59 87.16 88.54 89.30 89.83 89.62 89.92 CFB dec | 28.12 111.05 202.47 209.02 214.21 210.90 209.12 CTR enc | 28.04 108.81 200.62 206.65 211.78 208.78 206.74 CTR dec | 28.02 108.82 200.45 206.62 211.78 208.74 206.70 sm4-ce-cipher ECB enc | 336.79 587.13 682.70 747.37 803.75 811.52 818.06 ECB dec | 339.18 584.52 679.72 743.68 798.82 803.83 811.54 CBC enc | 316.63 521.47 597.00 647.14 690.82 695.21 700.55 CBC dec | 291.80 503.79 585.66 640.82 689.86 695.16 701.72 CFB enc | 294.79 482.31 552.13 594.71 631.60 628.91 638.92 CFB dec | 293.09 466.44 526.56 563.17 594.41 592.26 601.97 CTR enc | 309.61 506.13 576.86 620.47 656.38 654.51 665.10 CTR dec | 306.69 505.57 576.84 620.18 657.09 654.52 665.32 sm4-ce ECB enc | 366.96 1329.81 2024.29 2755.50 3790.07 3861.91 4051.40 ECB dec | 367.30 1323.93 2018.72 2747.43 3787.39 3862.55 4052.62 CBC enc | 358.09 682.68 807.24 885.35 958.29 963.60 973.73 CBC dec | 366.51 1303.63 1978.64 2667.93 3624.53 3683.41 3856.08 CFB enc | 351.51 681.26 807.81 893.10 968.54 969.17 985.83 CFB dec | 354.98 1266.61 1929.63 2634.81 3614.23 3611.59 3841.68 CTR enc | 324.23 1121.25 1689.44 2256.70 2981.90 3007.79 3060.74 CTR dec | 324.18 1120.44 1694.31 2258.32 2982.01 3010.09 3060.99 Signed-off-by: Tianjia Zhang <tianjia.zhang@linux.alibaba.com> Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
2022-03-15 17:44:54 +08:00
ld1 {RIV.16b}, [x3]
crypto: arm64/sm4 - add ARMv8 Crypto Extensions implementation This adds ARMv8 implementations of SM4 in ECB, CBC, CFB and CTR modes using Crypto Extensions, also includes key expansion operations because the Crypto Extensions instruction is much faster than software implementations. The Crypto Extensions for SM4 can only run on ARMv8 implementations that have support for these optional extensions. Benchmark on T-Head Yitian-710 2.75 GHz, the data comes from the 218 mode of tcrypt. The abscissas are blocks of different lengths. The data is tabulated and the unit is Mb/s: sm4-generic | 16 64 128 256 1024 1420 4096 ECB enc | 80.05 91.42 93.66 94.77 95.69 95.77 95.86 ECB dec | 79.98 91.41 93.64 94.76 95.66 95.77 95.85 CBC enc | 78.55 86.50 88.02 88.77 89.36 89.42 89.48 CBC dec | 76.82 89.06 91.52 92.77 93.75 93.83 93.96 CFB enc | 77.64 86.13 87.62 88.42 89.08 88.83 89.18 CFB dec | 77.57 88.34 90.36 91.45 92.34 92.00 92.44 CTR enc | 77.80 88.28 90.23 91.22 92.11 91.81 92.25 CTR dec | 77.83 88.22 90.22 91.22 92.04 91.82 92.28 sm4-neon ECB enc | 28.31 112.77 203.03 209.89 215.49 202.11 210.59 ECB dec | 28.36 113.45 203.23 210.00 215.52 202.13 210.65 CBC enc | 79.32 87.02 88.51 89.28 89.85 89.89 89.97 CBC dec | 28.29 112.20 203.30 209.82 214.99 201.51 209.95 CFB enc | 79.59 87.16 88.54 89.30 89.83 89.62 89.92 CFB dec | 28.12 111.05 202.47 209.02 214.21 210.90 209.12 CTR enc | 28.04 108.81 200.62 206.65 211.78 208.78 206.74 CTR dec | 28.02 108.82 200.45 206.62 211.78 208.74 206.70 sm4-ce-cipher ECB enc | 336.79 587.13 682.70 747.37 803.75 811.52 818.06 ECB dec | 339.18 584.52 679.72 743.68 798.82 803.83 811.54 CBC enc | 316.63 521.47 597.00 647.14 690.82 695.21 700.55 CBC dec | 291.80 503.79 585.66 640.82 689.86 695.16 701.72 CFB enc | 294.79 482.31 552.13 594.71 631.60 628.91 638.92 CFB dec | 293.09 466.44 526.56 563.17 594.41 592.26 601.97 CTR enc | 309.61 506.13 576.86 620.47 656.38 654.51 665.10 CTR dec | 306.69 505.57 576.84 620.18 657.09 654.52 665.32 sm4-ce ECB enc | 366.96 1329.81 2024.29 2755.50 3790.07 3861.91 4051.40 ECB dec | 367.30 1323.93 2018.72 2747.43 3787.39 3862.55 4052.62 CBC enc | 358.09 682.68 807.24 885.35 958.29 963.60 973.73 CBC dec | 366.51 1303.63 1978.64 2667.93 3624.53 3683.41 3856.08 CFB enc | 351.51 681.26 807.81 893.10 968.54 969.17 985.83 CFB dec | 354.98 1266.61 1929.63 2634.81 3614.23 3611.59 3841.68 CTR enc | 324.23 1121.25 1689.44 2256.70 2981.90 3007.79 3060.74 CTR dec | 324.18 1120.44 1694.31 2258.32 2982.01 3010.09 3060.99 Signed-off-by: Tianjia Zhang <tianjia.zhang@linux.alibaba.com> Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
2022-03-15 17:44:54 +08:00
.Lcbc_dec_loop_8x:
sub w4, w4, #8
tbnz w4, #31, .Lcbc_dec_4x
crypto: arm64/sm4 - add ARMv8 Crypto Extensions implementation This adds ARMv8 implementations of SM4 in ECB, CBC, CFB and CTR modes using Crypto Extensions, also includes key expansion operations because the Crypto Extensions instruction is much faster than software implementations. The Crypto Extensions for SM4 can only run on ARMv8 implementations that have support for these optional extensions. Benchmark on T-Head Yitian-710 2.75 GHz, the data comes from the 218 mode of tcrypt. The abscissas are blocks of different lengths. The data is tabulated and the unit is Mb/s: sm4-generic | 16 64 128 256 1024 1420 4096 ECB enc | 80.05 91.42 93.66 94.77 95.69 95.77 95.86 ECB dec | 79.98 91.41 93.64 94.76 95.66 95.77 95.85 CBC enc | 78.55 86.50 88.02 88.77 89.36 89.42 89.48 CBC dec | 76.82 89.06 91.52 92.77 93.75 93.83 93.96 CFB enc | 77.64 86.13 87.62 88.42 89.08 88.83 89.18 CFB dec | 77.57 88.34 90.36 91.45 92.34 92.00 92.44 CTR enc | 77.80 88.28 90.23 91.22 92.11 91.81 92.25 CTR dec | 77.83 88.22 90.22 91.22 92.04 91.82 92.28 sm4-neon ECB enc | 28.31 112.77 203.03 209.89 215.49 202.11 210.59 ECB dec | 28.36 113.45 203.23 210.00 215.52 202.13 210.65 CBC enc | 79.32 87.02 88.51 89.28 89.85 89.89 89.97 CBC dec | 28.29 112.20 203.30 209.82 214.99 201.51 209.95 CFB enc | 79.59 87.16 88.54 89.30 89.83 89.62 89.92 CFB dec | 28.12 111.05 202.47 209.02 214.21 210.90 209.12 CTR enc | 28.04 108.81 200.62 206.65 211.78 208.78 206.74 CTR dec | 28.02 108.82 200.45 206.62 211.78 208.74 206.70 sm4-ce-cipher ECB enc | 336.79 587.13 682.70 747.37 803.75 811.52 818.06 ECB dec | 339.18 584.52 679.72 743.68 798.82 803.83 811.54 CBC enc | 316.63 521.47 597.00 647.14 690.82 695.21 700.55 CBC dec | 291.80 503.79 585.66 640.82 689.86 695.16 701.72 CFB enc | 294.79 482.31 552.13 594.71 631.60 628.91 638.92 CFB dec | 293.09 466.44 526.56 563.17 594.41 592.26 601.97 CTR enc | 309.61 506.13 576.86 620.47 656.38 654.51 665.10 CTR dec | 306.69 505.57 576.84 620.18 657.09 654.52 665.32 sm4-ce ECB enc | 366.96 1329.81 2024.29 2755.50 3790.07 3861.91 4051.40 ECB dec | 367.30 1323.93 2018.72 2747.43 3787.39 3862.55 4052.62 CBC enc | 358.09 682.68 807.24 885.35 958.29 963.60 973.73 CBC dec | 366.51 1303.63 1978.64 2667.93 3624.53 3683.41 3856.08 CFB enc | 351.51 681.26 807.81 893.10 968.54 969.17 985.83 CFB dec | 354.98 1266.61 1929.63 2634.81 3614.23 3611.59 3841.68 CTR enc | 324.23 1121.25 1689.44 2256.70 2981.90 3007.79 3060.74 CTR dec | 324.18 1120.44 1694.31 2258.32 2982.01 3010.09 3060.99 Signed-off-by: Tianjia Zhang <tianjia.zhang@linux.alibaba.com> Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
2022-03-15 17:44:54 +08:00
ld1 {v0.16b-v3.16b}, [x2], #64
ld1 {v4.16b-v7.16b}, [x2], #64
crypto: arm64/sm4 - add ARMv8 Crypto Extensions implementation This adds ARMv8 implementations of SM4 in ECB, CBC, CFB and CTR modes using Crypto Extensions, also includes key expansion operations because the Crypto Extensions instruction is much faster than software implementations. The Crypto Extensions for SM4 can only run on ARMv8 implementations that have support for these optional extensions. Benchmark on T-Head Yitian-710 2.75 GHz, the data comes from the 218 mode of tcrypt. The abscissas are blocks of different lengths. The data is tabulated and the unit is Mb/s: sm4-generic | 16 64 128 256 1024 1420 4096 ECB enc | 80.05 91.42 93.66 94.77 95.69 95.77 95.86 ECB dec | 79.98 91.41 93.64 94.76 95.66 95.77 95.85 CBC enc | 78.55 86.50 88.02 88.77 89.36 89.42 89.48 CBC dec | 76.82 89.06 91.52 92.77 93.75 93.83 93.96 CFB enc | 77.64 86.13 87.62 88.42 89.08 88.83 89.18 CFB dec | 77.57 88.34 90.36 91.45 92.34 92.00 92.44 CTR enc | 77.80 88.28 90.23 91.22 92.11 91.81 92.25 CTR dec | 77.83 88.22 90.22 91.22 92.04 91.82 92.28 sm4-neon ECB enc | 28.31 112.77 203.03 209.89 215.49 202.11 210.59 ECB dec | 28.36 113.45 203.23 210.00 215.52 202.13 210.65 CBC enc | 79.32 87.02 88.51 89.28 89.85 89.89 89.97 CBC dec | 28.29 112.20 203.30 209.82 214.99 201.51 209.95 CFB enc | 79.59 87.16 88.54 89.30 89.83 89.62 89.92 CFB dec | 28.12 111.05 202.47 209.02 214.21 210.90 209.12 CTR enc | 28.04 108.81 200.62 206.65 211.78 208.78 206.74 CTR dec | 28.02 108.82 200.45 206.62 211.78 208.74 206.70 sm4-ce-cipher ECB enc | 336.79 587.13 682.70 747.37 803.75 811.52 818.06 ECB dec | 339.18 584.52 679.72 743.68 798.82 803.83 811.54 CBC enc | 316.63 521.47 597.00 647.14 690.82 695.21 700.55 CBC dec | 291.80 503.79 585.66 640.82 689.86 695.16 701.72 CFB enc | 294.79 482.31 552.13 594.71 631.60 628.91 638.92 CFB dec | 293.09 466.44 526.56 563.17 594.41 592.26 601.97 CTR enc | 309.61 506.13 576.86 620.47 656.38 654.51 665.10 CTR dec | 306.69 505.57 576.84 620.18 657.09 654.52 665.32 sm4-ce ECB enc | 366.96 1329.81 2024.29 2755.50 3790.07 3861.91 4051.40 ECB dec | 367.30 1323.93 2018.72 2747.43 3787.39 3862.55 4052.62 CBC enc | 358.09 682.68 807.24 885.35 958.29 963.60 973.73 CBC dec | 366.51 1303.63 1978.64 2667.93 3624.53 3683.41 3856.08 CFB enc | 351.51 681.26 807.81 893.10 968.54 969.17 985.83 CFB dec | 354.98 1266.61 1929.63 2634.81 3614.23 3611.59 3841.68 CTR enc | 324.23 1121.25 1689.44 2256.70 2981.90 3007.79 3060.74 CTR dec | 324.18 1120.44 1694.31 2258.32 2982.01 3010.09 3060.99 Signed-off-by: Tianjia Zhang <tianjia.zhang@linux.alibaba.com> Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
2022-03-15 17:44:54 +08:00
rev32 v8.16b, v0.16b
rev32 v9.16b, v1.16b
rev32 v10.16b, v2.16b
rev32 v11.16b, v3.16b
rev32 v12.16b, v4.16b
rev32 v13.16b, v5.16b
rev32 v14.16b, v6.16b
rev32 v15.16b, v7.16b
crypto: arm64/sm4 - add ARMv8 Crypto Extensions implementation This adds ARMv8 implementations of SM4 in ECB, CBC, CFB and CTR modes using Crypto Extensions, also includes key expansion operations because the Crypto Extensions instruction is much faster than software implementations. The Crypto Extensions for SM4 can only run on ARMv8 implementations that have support for these optional extensions. Benchmark on T-Head Yitian-710 2.75 GHz, the data comes from the 218 mode of tcrypt. The abscissas are blocks of different lengths. The data is tabulated and the unit is Mb/s: sm4-generic | 16 64 128 256 1024 1420 4096 ECB enc | 80.05 91.42 93.66 94.77 95.69 95.77 95.86 ECB dec | 79.98 91.41 93.64 94.76 95.66 95.77 95.85 CBC enc | 78.55 86.50 88.02 88.77 89.36 89.42 89.48 CBC dec | 76.82 89.06 91.52 92.77 93.75 93.83 93.96 CFB enc | 77.64 86.13 87.62 88.42 89.08 88.83 89.18 CFB dec | 77.57 88.34 90.36 91.45 92.34 92.00 92.44 CTR enc | 77.80 88.28 90.23 91.22 92.11 91.81 92.25 CTR dec | 77.83 88.22 90.22 91.22 92.04 91.82 92.28 sm4-neon ECB enc | 28.31 112.77 203.03 209.89 215.49 202.11 210.59 ECB dec | 28.36 113.45 203.23 210.00 215.52 202.13 210.65 CBC enc | 79.32 87.02 88.51 89.28 89.85 89.89 89.97 CBC dec | 28.29 112.20 203.30 209.82 214.99 201.51 209.95 CFB enc | 79.59 87.16 88.54 89.30 89.83 89.62 89.92 CFB dec | 28.12 111.05 202.47 209.02 214.21 210.90 209.12 CTR enc | 28.04 108.81 200.62 206.65 211.78 208.78 206.74 CTR dec | 28.02 108.82 200.45 206.62 211.78 208.74 206.70 sm4-ce-cipher ECB enc | 336.79 587.13 682.70 747.37 803.75 811.52 818.06 ECB dec | 339.18 584.52 679.72 743.68 798.82 803.83 811.54 CBC enc | 316.63 521.47 597.00 647.14 690.82 695.21 700.55 CBC dec | 291.80 503.79 585.66 640.82 689.86 695.16 701.72 CFB enc | 294.79 482.31 552.13 594.71 631.60 628.91 638.92 CFB dec | 293.09 466.44 526.56 563.17 594.41 592.26 601.97 CTR enc | 309.61 506.13 576.86 620.47 656.38 654.51 665.10 CTR dec | 306.69 505.57 576.84 620.18 657.09 654.52 665.32 sm4-ce ECB enc | 366.96 1329.81 2024.29 2755.50 3790.07 3861.91 4051.40 ECB dec | 367.30 1323.93 2018.72 2747.43 3787.39 3862.55 4052.62 CBC enc | 358.09 682.68 807.24 885.35 958.29 963.60 973.73 CBC dec | 366.51 1303.63 1978.64 2667.93 3624.53 3683.41 3856.08 CFB enc | 351.51 681.26 807.81 893.10 968.54 969.17 985.83 CFB dec | 354.98 1266.61 1929.63 2634.81 3614.23 3611.59 3841.68 CTR enc | 324.23 1121.25 1689.44 2256.70 2981.90 3007.79 3060.74 CTR dec | 324.18 1120.44 1694.31 2258.32 2982.01 3010.09 3060.99 Signed-off-by: Tianjia Zhang <tianjia.zhang@linux.alibaba.com> Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
2022-03-15 17:44:54 +08:00
SM4_CRYPT_BLK8_BE(v8, v9, v10, v11, v12, v13, v14, v15)
crypto: arm64/sm4 - add ARMv8 Crypto Extensions implementation This adds ARMv8 implementations of SM4 in ECB, CBC, CFB and CTR modes using Crypto Extensions, also includes key expansion operations because the Crypto Extensions instruction is much faster than software implementations. The Crypto Extensions for SM4 can only run on ARMv8 implementations that have support for these optional extensions. Benchmark on T-Head Yitian-710 2.75 GHz, the data comes from the 218 mode of tcrypt. The abscissas are blocks of different lengths. The data is tabulated and the unit is Mb/s: sm4-generic | 16 64 128 256 1024 1420 4096 ECB enc | 80.05 91.42 93.66 94.77 95.69 95.77 95.86 ECB dec | 79.98 91.41 93.64 94.76 95.66 95.77 95.85 CBC enc | 78.55 86.50 88.02 88.77 89.36 89.42 89.48 CBC dec | 76.82 89.06 91.52 92.77 93.75 93.83 93.96 CFB enc | 77.64 86.13 87.62 88.42 89.08 88.83 89.18 CFB dec | 77.57 88.34 90.36 91.45 92.34 92.00 92.44 CTR enc | 77.80 88.28 90.23 91.22 92.11 91.81 92.25 CTR dec | 77.83 88.22 90.22 91.22 92.04 91.82 92.28 sm4-neon ECB enc | 28.31 112.77 203.03 209.89 215.49 202.11 210.59 ECB dec | 28.36 113.45 203.23 210.00 215.52 202.13 210.65 CBC enc | 79.32 87.02 88.51 89.28 89.85 89.89 89.97 CBC dec | 28.29 112.20 203.30 209.82 214.99 201.51 209.95 CFB enc | 79.59 87.16 88.54 89.30 89.83 89.62 89.92 CFB dec | 28.12 111.05 202.47 209.02 214.21 210.90 209.12 CTR enc | 28.04 108.81 200.62 206.65 211.78 208.78 206.74 CTR dec | 28.02 108.82 200.45 206.62 211.78 208.74 206.70 sm4-ce-cipher ECB enc | 336.79 587.13 682.70 747.37 803.75 811.52 818.06 ECB dec | 339.18 584.52 679.72 743.68 798.82 803.83 811.54 CBC enc | 316.63 521.47 597.00 647.14 690.82 695.21 700.55 CBC dec | 291.80 503.79 585.66 640.82 689.86 695.16 701.72 CFB enc | 294.79 482.31 552.13 594.71 631.60 628.91 638.92 CFB dec | 293.09 466.44 526.56 563.17 594.41 592.26 601.97 CTR enc | 309.61 506.13 576.86 620.47 656.38 654.51 665.10 CTR dec | 306.69 505.57 576.84 620.18 657.09 654.52 665.32 sm4-ce ECB enc | 366.96 1329.81 2024.29 2755.50 3790.07 3861.91 4051.40 ECB dec | 367.30 1323.93 2018.72 2747.43 3787.39 3862.55 4052.62 CBC enc | 358.09 682.68 807.24 885.35 958.29 963.60 973.73 CBC dec | 366.51 1303.63 1978.64 2667.93 3624.53 3683.41 3856.08 CFB enc | 351.51 681.26 807.81 893.10 968.54 969.17 985.83 CFB dec | 354.98 1266.61 1929.63 2634.81 3614.23 3611.59 3841.68 CTR enc | 324.23 1121.25 1689.44 2256.70 2981.90 3007.79 3060.74 CTR dec | 324.18 1120.44 1694.31 2258.32 2982.01 3010.09 3060.99 Signed-off-by: Tianjia Zhang <tianjia.zhang@linux.alibaba.com> Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
2022-03-15 17:44:54 +08:00
eor v8.16b, v8.16b, RIV.16b
eor v9.16b, v9.16b, v0.16b
eor v10.16b, v10.16b, v1.16b
eor v11.16b, v11.16b, v2.16b
eor v12.16b, v12.16b, v3.16b
eor v13.16b, v13.16b, v4.16b
eor v14.16b, v14.16b, v5.16b
eor v15.16b, v15.16b, v6.16b
crypto: arm64/sm4 - add ARMv8 Crypto Extensions implementation This adds ARMv8 implementations of SM4 in ECB, CBC, CFB and CTR modes using Crypto Extensions, also includes key expansion operations because the Crypto Extensions instruction is much faster than software implementations. The Crypto Extensions for SM4 can only run on ARMv8 implementations that have support for these optional extensions. Benchmark on T-Head Yitian-710 2.75 GHz, the data comes from the 218 mode of tcrypt. The abscissas are blocks of different lengths. The data is tabulated and the unit is Mb/s: sm4-generic | 16 64 128 256 1024 1420 4096 ECB enc | 80.05 91.42 93.66 94.77 95.69 95.77 95.86 ECB dec | 79.98 91.41 93.64 94.76 95.66 95.77 95.85 CBC enc | 78.55 86.50 88.02 88.77 89.36 89.42 89.48 CBC dec | 76.82 89.06 91.52 92.77 93.75 93.83 93.96 CFB enc | 77.64 86.13 87.62 88.42 89.08 88.83 89.18 CFB dec | 77.57 88.34 90.36 91.45 92.34 92.00 92.44 CTR enc | 77.80 88.28 90.23 91.22 92.11 91.81 92.25 CTR dec | 77.83 88.22 90.22 91.22 92.04 91.82 92.28 sm4-neon ECB enc | 28.31 112.77 203.03 209.89 215.49 202.11 210.59 ECB dec | 28.36 113.45 203.23 210.00 215.52 202.13 210.65 CBC enc | 79.32 87.02 88.51 89.28 89.85 89.89 89.97 CBC dec | 28.29 112.20 203.30 209.82 214.99 201.51 209.95 CFB enc | 79.59 87.16 88.54 89.30 89.83 89.62 89.92 CFB dec | 28.12 111.05 202.47 209.02 214.21 210.90 209.12 CTR enc | 28.04 108.81 200.62 206.65 211.78 208.78 206.74 CTR dec | 28.02 108.82 200.45 206.62 211.78 208.74 206.70 sm4-ce-cipher ECB enc | 336.79 587.13 682.70 747.37 803.75 811.52 818.06 ECB dec | 339.18 584.52 679.72 743.68 798.82 803.83 811.54 CBC enc | 316.63 521.47 597.00 647.14 690.82 695.21 700.55 CBC dec | 291.80 503.79 585.66 640.82 689.86 695.16 701.72 CFB enc | 294.79 482.31 552.13 594.71 631.60 628.91 638.92 CFB dec | 293.09 466.44 526.56 563.17 594.41 592.26 601.97 CTR enc | 309.61 506.13 576.86 620.47 656.38 654.51 665.10 CTR dec | 306.69 505.57 576.84 620.18 657.09 654.52 665.32 sm4-ce ECB enc | 366.96 1329.81 2024.29 2755.50 3790.07 3861.91 4051.40 ECB dec | 367.30 1323.93 2018.72 2747.43 3787.39 3862.55 4052.62 CBC enc | 358.09 682.68 807.24 885.35 958.29 963.60 973.73 CBC dec | 366.51 1303.63 1978.64 2667.93 3624.53 3683.41 3856.08 CFB enc | 351.51 681.26 807.81 893.10 968.54 969.17 985.83 CFB dec | 354.98 1266.61 1929.63 2634.81 3614.23 3611.59 3841.68 CTR enc | 324.23 1121.25 1689.44 2256.70 2981.90 3007.79 3060.74 CTR dec | 324.18 1120.44 1694.31 2258.32 2982.01 3010.09 3060.99 Signed-off-by: Tianjia Zhang <tianjia.zhang@linux.alibaba.com> Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
2022-03-15 17:44:54 +08:00
st1 {v8.16b-v11.16b}, [x1], #64
st1 {v12.16b-v15.16b}, [x1], #64
crypto: arm64/sm4 - add ARMv8 Crypto Extensions implementation This adds ARMv8 implementations of SM4 in ECB, CBC, CFB and CTR modes using Crypto Extensions, also includes key expansion operations because the Crypto Extensions instruction is much faster than software implementations. The Crypto Extensions for SM4 can only run on ARMv8 implementations that have support for these optional extensions. Benchmark on T-Head Yitian-710 2.75 GHz, the data comes from the 218 mode of tcrypt. The abscissas are blocks of different lengths. The data is tabulated and the unit is Mb/s: sm4-generic | 16 64 128 256 1024 1420 4096 ECB enc | 80.05 91.42 93.66 94.77 95.69 95.77 95.86 ECB dec | 79.98 91.41 93.64 94.76 95.66 95.77 95.85 CBC enc | 78.55 86.50 88.02 88.77 89.36 89.42 89.48 CBC dec | 76.82 89.06 91.52 92.77 93.75 93.83 93.96 CFB enc | 77.64 86.13 87.62 88.42 89.08 88.83 89.18 CFB dec | 77.57 88.34 90.36 91.45 92.34 92.00 92.44 CTR enc | 77.80 88.28 90.23 91.22 92.11 91.81 92.25 CTR dec | 77.83 88.22 90.22 91.22 92.04 91.82 92.28 sm4-neon ECB enc | 28.31 112.77 203.03 209.89 215.49 202.11 210.59 ECB dec | 28.36 113.45 203.23 210.00 215.52 202.13 210.65 CBC enc | 79.32 87.02 88.51 89.28 89.85 89.89 89.97 CBC dec | 28.29 112.20 203.30 209.82 214.99 201.51 209.95 CFB enc | 79.59 87.16 88.54 89.30 89.83 89.62 89.92 CFB dec | 28.12 111.05 202.47 209.02 214.21 210.90 209.12 CTR enc | 28.04 108.81 200.62 206.65 211.78 208.78 206.74 CTR dec | 28.02 108.82 200.45 206.62 211.78 208.74 206.70 sm4-ce-cipher ECB enc | 336.79 587.13 682.70 747.37 803.75 811.52 818.06 ECB dec | 339.18 584.52 679.72 743.68 798.82 803.83 811.54 CBC enc | 316.63 521.47 597.00 647.14 690.82 695.21 700.55 CBC dec | 291.80 503.79 585.66 640.82 689.86 695.16 701.72 CFB enc | 294.79 482.31 552.13 594.71 631.60 628.91 638.92 CFB dec | 293.09 466.44 526.56 563.17 594.41 592.26 601.97 CTR enc | 309.61 506.13 576.86 620.47 656.38 654.51 665.10 CTR dec | 306.69 505.57 576.84 620.18 657.09 654.52 665.32 sm4-ce ECB enc | 366.96 1329.81 2024.29 2755.50 3790.07 3861.91 4051.40 ECB dec | 367.30 1323.93 2018.72 2747.43 3787.39 3862.55 4052.62 CBC enc | 358.09 682.68 807.24 885.35 958.29 963.60 973.73 CBC dec | 366.51 1303.63 1978.64 2667.93 3624.53 3683.41 3856.08 CFB enc | 351.51 681.26 807.81 893.10 968.54 969.17 985.83 CFB dec | 354.98 1266.61 1929.63 2634.81 3614.23 3611.59 3841.68 CTR enc | 324.23 1121.25 1689.44 2256.70 2981.90 3007.79 3060.74 CTR dec | 324.18 1120.44 1694.31 2258.32 2982.01 3010.09 3060.99 Signed-off-by: Tianjia Zhang <tianjia.zhang@linux.alibaba.com> Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
2022-03-15 17:44:54 +08:00
mov RIV.16b, v7.16b
crypto: arm64/sm4 - add ARMv8 Crypto Extensions implementation This adds ARMv8 implementations of SM4 in ECB, CBC, CFB and CTR modes using Crypto Extensions, also includes key expansion operations because the Crypto Extensions instruction is much faster than software implementations. The Crypto Extensions for SM4 can only run on ARMv8 implementations that have support for these optional extensions. Benchmark on T-Head Yitian-710 2.75 GHz, the data comes from the 218 mode of tcrypt. The abscissas are blocks of different lengths. The data is tabulated and the unit is Mb/s: sm4-generic | 16 64 128 256 1024 1420 4096 ECB enc | 80.05 91.42 93.66 94.77 95.69 95.77 95.86 ECB dec | 79.98 91.41 93.64 94.76 95.66 95.77 95.85 CBC enc | 78.55 86.50 88.02 88.77 89.36 89.42 89.48 CBC dec | 76.82 89.06 91.52 92.77 93.75 93.83 93.96 CFB enc | 77.64 86.13 87.62 88.42 89.08 88.83 89.18 CFB dec | 77.57 88.34 90.36 91.45 92.34 92.00 92.44 CTR enc | 77.80 88.28 90.23 91.22 92.11 91.81 92.25 CTR dec | 77.83 88.22 90.22 91.22 92.04 91.82 92.28 sm4-neon ECB enc | 28.31 112.77 203.03 209.89 215.49 202.11 210.59 ECB dec | 28.36 113.45 203.23 210.00 215.52 202.13 210.65 CBC enc | 79.32 87.02 88.51 89.28 89.85 89.89 89.97 CBC dec | 28.29 112.20 203.30 209.82 214.99 201.51 209.95 CFB enc | 79.59 87.16 88.54 89.30 89.83 89.62 89.92 CFB dec | 28.12 111.05 202.47 209.02 214.21 210.90 209.12 CTR enc | 28.04 108.81 200.62 206.65 211.78 208.78 206.74 CTR dec | 28.02 108.82 200.45 206.62 211.78 208.74 206.70 sm4-ce-cipher ECB enc | 336.79 587.13 682.70 747.37 803.75 811.52 818.06 ECB dec | 339.18 584.52 679.72 743.68 798.82 803.83 811.54 CBC enc | 316.63 521.47 597.00 647.14 690.82 695.21 700.55 CBC dec | 291.80 503.79 585.66 640.82 689.86 695.16 701.72 CFB enc | 294.79 482.31 552.13 594.71 631.60 628.91 638.92 CFB dec | 293.09 466.44 526.56 563.17 594.41 592.26 601.97 CTR enc | 309.61 506.13 576.86 620.47 656.38 654.51 665.10 CTR dec | 306.69 505.57 576.84 620.18 657.09 654.52 665.32 sm4-ce ECB enc | 366.96 1329.81 2024.29 2755.50 3790.07 3861.91 4051.40 ECB dec | 367.30 1323.93 2018.72 2747.43 3787.39 3862.55 4052.62 CBC enc | 358.09 682.68 807.24 885.35 958.29 963.60 973.73 CBC dec | 366.51 1303.63 1978.64 2667.93 3624.53 3683.41 3856.08 CFB enc | 351.51 681.26 807.81 893.10 968.54 969.17 985.83 CFB dec | 354.98 1266.61 1929.63 2634.81 3614.23 3611.59 3841.68 CTR enc | 324.23 1121.25 1689.44 2256.70 2981.90 3007.79 3060.74 CTR dec | 324.18 1120.44 1694.31 2258.32 2982.01 3010.09 3060.99 Signed-off-by: Tianjia Zhang <tianjia.zhang@linux.alibaba.com> Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
2022-03-15 17:44:54 +08:00
cbz w4, .Lcbc_dec_end
b .Lcbc_dec_loop_8x
crypto: arm64/sm4 - add ARMv8 Crypto Extensions implementation This adds ARMv8 implementations of SM4 in ECB, CBC, CFB and CTR modes using Crypto Extensions, also includes key expansion operations because the Crypto Extensions instruction is much faster than software implementations. The Crypto Extensions for SM4 can only run on ARMv8 implementations that have support for these optional extensions. Benchmark on T-Head Yitian-710 2.75 GHz, the data comes from the 218 mode of tcrypt. The abscissas are blocks of different lengths. The data is tabulated and the unit is Mb/s: sm4-generic | 16 64 128 256 1024 1420 4096 ECB enc | 80.05 91.42 93.66 94.77 95.69 95.77 95.86 ECB dec | 79.98 91.41 93.64 94.76 95.66 95.77 95.85 CBC enc | 78.55 86.50 88.02 88.77 89.36 89.42 89.48 CBC dec | 76.82 89.06 91.52 92.77 93.75 93.83 93.96 CFB enc | 77.64 86.13 87.62 88.42 89.08 88.83 89.18 CFB dec | 77.57 88.34 90.36 91.45 92.34 92.00 92.44 CTR enc | 77.80 88.28 90.23 91.22 92.11 91.81 92.25 CTR dec | 77.83 88.22 90.22 91.22 92.04 91.82 92.28 sm4-neon ECB enc | 28.31 112.77 203.03 209.89 215.49 202.11 210.59 ECB dec | 28.36 113.45 203.23 210.00 215.52 202.13 210.65 CBC enc | 79.32 87.02 88.51 89.28 89.85 89.89 89.97 CBC dec | 28.29 112.20 203.30 209.82 214.99 201.51 209.95 CFB enc | 79.59 87.16 88.54 89.30 89.83 89.62 89.92 CFB dec | 28.12 111.05 202.47 209.02 214.21 210.90 209.12 CTR enc | 28.04 108.81 200.62 206.65 211.78 208.78 206.74 CTR dec | 28.02 108.82 200.45 206.62 211.78 208.74 206.70 sm4-ce-cipher ECB enc | 336.79 587.13 682.70 747.37 803.75 811.52 818.06 ECB dec | 339.18 584.52 679.72 743.68 798.82 803.83 811.54 CBC enc | 316.63 521.47 597.00 647.14 690.82 695.21 700.55 CBC dec | 291.80 503.79 585.66 640.82 689.86 695.16 701.72 CFB enc | 294.79 482.31 552.13 594.71 631.60 628.91 638.92 CFB dec | 293.09 466.44 526.56 563.17 594.41 592.26 601.97 CTR enc | 309.61 506.13 576.86 620.47 656.38 654.51 665.10 CTR dec | 306.69 505.57 576.84 620.18 657.09 654.52 665.32 sm4-ce ECB enc | 366.96 1329.81 2024.29 2755.50 3790.07 3861.91 4051.40 ECB dec | 367.30 1323.93 2018.72 2747.43 3787.39 3862.55 4052.62 CBC enc | 358.09 682.68 807.24 885.35 958.29 963.60 973.73 CBC dec | 366.51 1303.63 1978.64 2667.93 3624.53 3683.41 3856.08 CFB enc | 351.51 681.26 807.81 893.10 968.54 969.17 985.83 CFB dec | 354.98 1266.61 1929.63 2634.81 3614.23 3611.59 3841.68 CTR enc | 324.23 1121.25 1689.44 2256.70 2981.90 3007.79 3060.74 CTR dec | 324.18 1120.44 1694.31 2258.32 2982.01 3010.09 3060.99 Signed-off-by: Tianjia Zhang <tianjia.zhang@linux.alibaba.com> Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
2022-03-15 17:44:54 +08:00
.Lcbc_dec_4x:
add w4, w4, #8
cmp w4, #4
blt .Lcbc_dec_loop_1x
crypto: arm64/sm4 - add ARMv8 Crypto Extensions implementation This adds ARMv8 implementations of SM4 in ECB, CBC, CFB and CTR modes using Crypto Extensions, also includes key expansion operations because the Crypto Extensions instruction is much faster than software implementations. The Crypto Extensions for SM4 can only run on ARMv8 implementations that have support for these optional extensions. Benchmark on T-Head Yitian-710 2.75 GHz, the data comes from the 218 mode of tcrypt. The abscissas are blocks of different lengths. The data is tabulated and the unit is Mb/s: sm4-generic | 16 64 128 256 1024 1420 4096 ECB enc | 80.05 91.42 93.66 94.77 95.69 95.77 95.86 ECB dec | 79.98 91.41 93.64 94.76 95.66 95.77 95.85 CBC enc | 78.55 86.50 88.02 88.77 89.36 89.42 89.48 CBC dec | 76.82 89.06 91.52 92.77 93.75 93.83 93.96 CFB enc | 77.64 86.13 87.62 88.42 89.08 88.83 89.18 CFB dec | 77.57 88.34 90.36 91.45 92.34 92.00 92.44 CTR enc | 77.80 88.28 90.23 91.22 92.11 91.81 92.25 CTR dec | 77.83 88.22 90.22 91.22 92.04 91.82 92.28 sm4-neon ECB enc | 28.31 112.77 203.03 209.89 215.49 202.11 210.59 ECB dec | 28.36 113.45 203.23 210.00 215.52 202.13 210.65 CBC enc | 79.32 87.02 88.51 89.28 89.85 89.89 89.97 CBC dec | 28.29 112.20 203.30 209.82 214.99 201.51 209.95 CFB enc | 79.59 87.16 88.54 89.30 89.83 89.62 89.92 CFB dec | 28.12 111.05 202.47 209.02 214.21 210.90 209.12 CTR enc | 28.04 108.81 200.62 206.65 211.78 208.78 206.74 CTR dec | 28.02 108.82 200.45 206.62 211.78 208.74 206.70 sm4-ce-cipher ECB enc | 336.79 587.13 682.70 747.37 803.75 811.52 818.06 ECB dec | 339.18 584.52 679.72 743.68 798.82 803.83 811.54 CBC enc | 316.63 521.47 597.00 647.14 690.82 695.21 700.55 CBC dec | 291.80 503.79 585.66 640.82 689.86 695.16 701.72 CFB enc | 294.79 482.31 552.13 594.71 631.60 628.91 638.92 CFB dec | 293.09 466.44 526.56 563.17 594.41 592.26 601.97 CTR enc | 309.61 506.13 576.86 620.47 656.38 654.51 665.10 CTR dec | 306.69 505.57 576.84 620.18 657.09 654.52 665.32 sm4-ce ECB enc | 366.96 1329.81 2024.29 2755.50 3790.07 3861.91 4051.40 ECB dec | 367.30 1323.93 2018.72 2747.43 3787.39 3862.55 4052.62 CBC enc | 358.09 682.68 807.24 885.35 958.29 963.60 973.73 CBC dec | 366.51 1303.63 1978.64 2667.93 3624.53 3683.41 3856.08 CFB enc | 351.51 681.26 807.81 893.10 968.54 969.17 985.83 CFB dec | 354.98 1266.61 1929.63 2634.81 3614.23 3611.59 3841.68 CTR enc | 324.23 1121.25 1689.44 2256.70 2981.90 3007.79 3060.74 CTR dec | 324.18 1120.44 1694.31 2258.32 2982.01 3010.09 3060.99 Signed-off-by: Tianjia Zhang <tianjia.zhang@linux.alibaba.com> Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
2022-03-15 17:44:54 +08:00
sub w4, w4, #4
crypto: arm64/sm4 - add ARMv8 Crypto Extensions implementation This adds ARMv8 implementations of SM4 in ECB, CBC, CFB and CTR modes using Crypto Extensions, also includes key expansion operations because the Crypto Extensions instruction is much faster than software implementations. The Crypto Extensions for SM4 can only run on ARMv8 implementations that have support for these optional extensions. Benchmark on T-Head Yitian-710 2.75 GHz, the data comes from the 218 mode of tcrypt. The abscissas are blocks of different lengths. The data is tabulated and the unit is Mb/s: sm4-generic | 16 64 128 256 1024 1420 4096 ECB enc | 80.05 91.42 93.66 94.77 95.69 95.77 95.86 ECB dec | 79.98 91.41 93.64 94.76 95.66 95.77 95.85 CBC enc | 78.55 86.50 88.02 88.77 89.36 89.42 89.48 CBC dec | 76.82 89.06 91.52 92.77 93.75 93.83 93.96 CFB enc | 77.64 86.13 87.62 88.42 89.08 88.83 89.18 CFB dec | 77.57 88.34 90.36 91.45 92.34 92.00 92.44 CTR enc | 77.80 88.28 90.23 91.22 92.11 91.81 92.25 CTR dec | 77.83 88.22 90.22 91.22 92.04 91.82 92.28 sm4-neon ECB enc | 28.31 112.77 203.03 209.89 215.49 202.11 210.59 ECB dec | 28.36 113.45 203.23 210.00 215.52 202.13 210.65 CBC enc | 79.32 87.02 88.51 89.28 89.85 89.89 89.97 CBC dec | 28.29 112.20 203.30 209.82 214.99 201.51 209.95 CFB enc | 79.59 87.16 88.54 89.30 89.83 89.62 89.92 CFB dec | 28.12 111.05 202.47 209.02 214.21 210.90 209.12 CTR enc | 28.04 108.81 200.62 206.65 211.78 208.78 206.74 CTR dec | 28.02 108.82 200.45 206.62 211.78 208.74 206.70 sm4-ce-cipher ECB enc | 336.79 587.13 682.70 747.37 803.75 811.52 818.06 ECB dec | 339.18 584.52 679.72 743.68 798.82 803.83 811.54 CBC enc | 316.63 521.47 597.00 647.14 690.82 695.21 700.55 CBC dec | 291.80 503.79 585.66 640.82 689.86 695.16 701.72 CFB enc | 294.79 482.31 552.13 594.71 631.60 628.91 638.92 CFB dec | 293.09 466.44 526.56 563.17 594.41 592.26 601.97 CTR enc | 309.61 506.13 576.86 620.47 656.38 654.51 665.10 CTR dec | 306.69 505.57 576.84 620.18 657.09 654.52 665.32 sm4-ce ECB enc | 366.96 1329.81 2024.29 2755.50 3790.07 3861.91 4051.40 ECB dec | 367.30 1323.93 2018.72 2747.43 3787.39 3862.55 4052.62 CBC enc | 358.09 682.68 807.24 885.35 958.29 963.60 973.73 CBC dec | 366.51 1303.63 1978.64 2667.93 3624.53 3683.41 3856.08 CFB enc | 351.51 681.26 807.81 893.10 968.54 969.17 985.83 CFB dec | 354.98 1266.61 1929.63 2634.81 3614.23 3611.59 3841.68 CTR enc | 324.23 1121.25 1689.44 2256.70 2981.90 3007.79 3060.74 CTR dec | 324.18 1120.44 1694.31 2258.32 2982.01 3010.09 3060.99 Signed-off-by: Tianjia Zhang <tianjia.zhang@linux.alibaba.com> Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
2022-03-15 17:44:54 +08:00
ld1 {v0.16b-v3.16b}, [x2], #64
crypto: arm64/sm4 - add ARMv8 Crypto Extensions implementation This adds ARMv8 implementations of SM4 in ECB, CBC, CFB and CTR modes using Crypto Extensions, also includes key expansion operations because the Crypto Extensions instruction is much faster than software implementations. The Crypto Extensions for SM4 can only run on ARMv8 implementations that have support for these optional extensions. Benchmark on T-Head Yitian-710 2.75 GHz, the data comes from the 218 mode of tcrypt. The abscissas are blocks of different lengths. The data is tabulated and the unit is Mb/s: sm4-generic | 16 64 128 256 1024 1420 4096 ECB enc | 80.05 91.42 93.66 94.77 95.69 95.77 95.86 ECB dec | 79.98 91.41 93.64 94.76 95.66 95.77 95.85 CBC enc | 78.55 86.50 88.02 88.77 89.36 89.42 89.48 CBC dec | 76.82 89.06 91.52 92.77 93.75 93.83 93.96 CFB enc | 77.64 86.13 87.62 88.42 89.08 88.83 89.18 CFB dec | 77.57 88.34 90.36 91.45 92.34 92.00 92.44 CTR enc | 77.80 88.28 90.23 91.22 92.11 91.81 92.25 CTR dec | 77.83 88.22 90.22 91.22 92.04 91.82 92.28 sm4-neon ECB enc | 28.31 112.77 203.03 209.89 215.49 202.11 210.59 ECB dec | 28.36 113.45 203.23 210.00 215.52 202.13 210.65 CBC enc | 79.32 87.02 88.51 89.28 89.85 89.89 89.97 CBC dec | 28.29 112.20 203.30 209.82 214.99 201.51 209.95 CFB enc | 79.59 87.16 88.54 89.30 89.83 89.62 89.92 CFB dec | 28.12 111.05 202.47 209.02 214.21 210.90 209.12 CTR enc | 28.04 108.81 200.62 206.65 211.78 208.78 206.74 CTR dec | 28.02 108.82 200.45 206.62 211.78 208.74 206.70 sm4-ce-cipher ECB enc | 336.79 587.13 682.70 747.37 803.75 811.52 818.06 ECB dec | 339.18 584.52 679.72 743.68 798.82 803.83 811.54 CBC enc | 316.63 521.47 597.00 647.14 690.82 695.21 700.55 CBC dec | 291.80 503.79 585.66 640.82 689.86 695.16 701.72 CFB enc | 294.79 482.31 552.13 594.71 631.60 628.91 638.92 CFB dec | 293.09 466.44 526.56 563.17 594.41 592.26 601.97 CTR enc | 309.61 506.13 576.86 620.47 656.38 654.51 665.10 CTR dec | 306.69 505.57 576.84 620.18 657.09 654.52 665.32 sm4-ce ECB enc | 366.96 1329.81 2024.29 2755.50 3790.07 3861.91 4051.40 ECB dec | 367.30 1323.93 2018.72 2747.43 3787.39 3862.55 4052.62 CBC enc | 358.09 682.68 807.24 885.35 958.29 963.60 973.73 CBC dec | 366.51 1303.63 1978.64 2667.93 3624.53 3683.41 3856.08 CFB enc | 351.51 681.26 807.81 893.10 968.54 969.17 985.83 CFB dec | 354.98 1266.61 1929.63 2634.81 3614.23 3611.59 3841.68 CTR enc | 324.23 1121.25 1689.44 2256.70 2981.90 3007.79 3060.74 CTR dec | 324.18 1120.44 1694.31 2258.32 2982.01 3010.09 3060.99 Signed-off-by: Tianjia Zhang <tianjia.zhang@linux.alibaba.com> Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
2022-03-15 17:44:54 +08:00
rev32 v8.16b, v0.16b
rev32 v9.16b, v1.16b
rev32 v10.16b, v2.16b
rev32 v11.16b, v3.16b
crypto: arm64/sm4 - add ARMv8 Crypto Extensions implementation This adds ARMv8 implementations of SM4 in ECB, CBC, CFB and CTR modes using Crypto Extensions, also includes key expansion operations because the Crypto Extensions instruction is much faster than software implementations. The Crypto Extensions for SM4 can only run on ARMv8 implementations that have support for these optional extensions. Benchmark on T-Head Yitian-710 2.75 GHz, the data comes from the 218 mode of tcrypt. The abscissas are blocks of different lengths. The data is tabulated and the unit is Mb/s: sm4-generic | 16 64 128 256 1024 1420 4096 ECB enc | 80.05 91.42 93.66 94.77 95.69 95.77 95.86 ECB dec | 79.98 91.41 93.64 94.76 95.66 95.77 95.85 CBC enc | 78.55 86.50 88.02 88.77 89.36 89.42 89.48 CBC dec | 76.82 89.06 91.52 92.77 93.75 93.83 93.96 CFB enc | 77.64 86.13 87.62 88.42 89.08 88.83 89.18 CFB dec | 77.57 88.34 90.36 91.45 92.34 92.00 92.44 CTR enc | 77.80 88.28 90.23 91.22 92.11 91.81 92.25 CTR dec | 77.83 88.22 90.22 91.22 92.04 91.82 92.28 sm4-neon ECB enc | 28.31 112.77 203.03 209.89 215.49 202.11 210.59 ECB dec | 28.36 113.45 203.23 210.00 215.52 202.13 210.65 CBC enc | 79.32 87.02 88.51 89.28 89.85 89.89 89.97 CBC dec | 28.29 112.20 203.30 209.82 214.99 201.51 209.95 CFB enc | 79.59 87.16 88.54 89.30 89.83 89.62 89.92 CFB dec | 28.12 111.05 202.47 209.02 214.21 210.90 209.12 CTR enc | 28.04 108.81 200.62 206.65 211.78 208.78 206.74 CTR dec | 28.02 108.82 200.45 206.62 211.78 208.74 206.70 sm4-ce-cipher ECB enc | 336.79 587.13 682.70 747.37 803.75 811.52 818.06 ECB dec | 339.18 584.52 679.72 743.68 798.82 803.83 811.54 CBC enc | 316.63 521.47 597.00 647.14 690.82 695.21 700.55 CBC dec | 291.80 503.79 585.66 640.82 689.86 695.16 701.72 CFB enc | 294.79 482.31 552.13 594.71 631.60 628.91 638.92 CFB dec | 293.09 466.44 526.56 563.17 594.41 592.26 601.97 CTR enc | 309.61 506.13 576.86 620.47 656.38 654.51 665.10 CTR dec | 306.69 505.57 576.84 620.18 657.09 654.52 665.32 sm4-ce ECB enc | 366.96 1329.81 2024.29 2755.50 3790.07 3861.91 4051.40 ECB dec | 367.30 1323.93 2018.72 2747.43 3787.39 3862.55 4052.62 CBC enc | 358.09 682.68 807.24 885.35 958.29 963.60 973.73 CBC dec | 366.51 1303.63 1978.64 2667.93 3624.53 3683.41 3856.08 CFB enc | 351.51 681.26 807.81 893.10 968.54 969.17 985.83 CFB dec | 354.98 1266.61 1929.63 2634.81 3614.23 3611.59 3841.68 CTR enc | 324.23 1121.25 1689.44 2256.70 2981.90 3007.79 3060.74 CTR dec | 324.18 1120.44 1694.31 2258.32 2982.01 3010.09 3060.99 Signed-off-by: Tianjia Zhang <tianjia.zhang@linux.alibaba.com> Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
2022-03-15 17:44:54 +08:00
SM4_CRYPT_BLK4_BE(v8, v9, v10, v11)
crypto: arm64/sm4 - add ARMv8 Crypto Extensions implementation This adds ARMv8 implementations of SM4 in ECB, CBC, CFB and CTR modes using Crypto Extensions, also includes key expansion operations because the Crypto Extensions instruction is much faster than software implementations. The Crypto Extensions for SM4 can only run on ARMv8 implementations that have support for these optional extensions. Benchmark on T-Head Yitian-710 2.75 GHz, the data comes from the 218 mode of tcrypt. The abscissas are blocks of different lengths. The data is tabulated and the unit is Mb/s: sm4-generic | 16 64 128 256 1024 1420 4096 ECB enc | 80.05 91.42 93.66 94.77 95.69 95.77 95.86 ECB dec | 79.98 91.41 93.64 94.76 95.66 95.77 95.85 CBC enc | 78.55 86.50 88.02 88.77 89.36 89.42 89.48 CBC dec | 76.82 89.06 91.52 92.77 93.75 93.83 93.96 CFB enc | 77.64 86.13 87.62 88.42 89.08 88.83 89.18 CFB dec | 77.57 88.34 90.36 91.45 92.34 92.00 92.44 CTR enc | 77.80 88.28 90.23 91.22 92.11 91.81 92.25 CTR dec | 77.83 88.22 90.22 91.22 92.04 91.82 92.28 sm4-neon ECB enc | 28.31 112.77 203.03 209.89 215.49 202.11 210.59 ECB dec | 28.36 113.45 203.23 210.00 215.52 202.13 210.65 CBC enc | 79.32 87.02 88.51 89.28 89.85 89.89 89.97 CBC dec | 28.29 112.20 203.30 209.82 214.99 201.51 209.95 CFB enc | 79.59 87.16 88.54 89.30 89.83 89.62 89.92 CFB dec | 28.12 111.05 202.47 209.02 214.21 210.90 209.12 CTR enc | 28.04 108.81 200.62 206.65 211.78 208.78 206.74 CTR dec | 28.02 108.82 200.45 206.62 211.78 208.74 206.70 sm4-ce-cipher ECB enc | 336.79 587.13 682.70 747.37 803.75 811.52 818.06 ECB dec | 339.18 584.52 679.72 743.68 798.82 803.83 811.54 CBC enc | 316.63 521.47 597.00 647.14 690.82 695.21 700.55 CBC dec | 291.80 503.79 585.66 640.82 689.86 695.16 701.72 CFB enc | 294.79 482.31 552.13 594.71 631.60 628.91 638.92 CFB dec | 293.09 466.44 526.56 563.17 594.41 592.26 601.97 CTR enc | 309.61 506.13 576.86 620.47 656.38 654.51 665.10 CTR dec | 306.69 505.57 576.84 620.18 657.09 654.52 665.32 sm4-ce ECB enc | 366.96 1329.81 2024.29 2755.50 3790.07 3861.91 4051.40 ECB dec | 367.30 1323.93 2018.72 2747.43 3787.39 3862.55 4052.62 CBC enc | 358.09 682.68 807.24 885.35 958.29 963.60 973.73 CBC dec | 366.51 1303.63 1978.64 2667.93 3624.53 3683.41 3856.08 CFB enc | 351.51 681.26 807.81 893.10 968.54 969.17 985.83 CFB dec | 354.98 1266.61 1929.63 2634.81 3614.23 3611.59 3841.68 CTR enc | 324.23 1121.25 1689.44 2256.70 2981.90 3007.79 3060.74 CTR dec | 324.18 1120.44 1694.31 2258.32 2982.01 3010.09 3060.99 Signed-off-by: Tianjia Zhang <tianjia.zhang@linux.alibaba.com> Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
2022-03-15 17:44:54 +08:00
eor v8.16b, v8.16b, RIV.16b
eor v9.16b, v9.16b, v0.16b
eor v10.16b, v10.16b, v1.16b
eor v11.16b, v11.16b, v2.16b
crypto: arm64/sm4 - add ARMv8 Crypto Extensions implementation This adds ARMv8 implementations of SM4 in ECB, CBC, CFB and CTR modes using Crypto Extensions, also includes key expansion operations because the Crypto Extensions instruction is much faster than software implementations. The Crypto Extensions for SM4 can only run on ARMv8 implementations that have support for these optional extensions. Benchmark on T-Head Yitian-710 2.75 GHz, the data comes from the 218 mode of tcrypt. The abscissas are blocks of different lengths. The data is tabulated and the unit is Mb/s: sm4-generic | 16 64 128 256 1024 1420 4096 ECB enc | 80.05 91.42 93.66 94.77 95.69 95.77 95.86 ECB dec | 79.98 91.41 93.64 94.76 95.66 95.77 95.85 CBC enc | 78.55 86.50 88.02 88.77 89.36 89.42 89.48 CBC dec | 76.82 89.06 91.52 92.77 93.75 93.83 93.96 CFB enc | 77.64 86.13 87.62 88.42 89.08 88.83 89.18 CFB dec | 77.57 88.34 90.36 91.45 92.34 92.00 92.44 CTR enc | 77.80 88.28 90.23 91.22 92.11 91.81 92.25 CTR dec | 77.83 88.22 90.22 91.22 92.04 91.82 92.28 sm4-neon ECB enc | 28.31 112.77 203.03 209.89 215.49 202.11 210.59 ECB dec | 28.36 113.45 203.23 210.00 215.52 202.13 210.65 CBC enc | 79.32 87.02 88.51 89.28 89.85 89.89 89.97 CBC dec | 28.29 112.20 203.30 209.82 214.99 201.51 209.95 CFB enc | 79.59 87.16 88.54 89.30 89.83 89.62 89.92 CFB dec | 28.12 111.05 202.47 209.02 214.21 210.90 209.12 CTR enc | 28.04 108.81 200.62 206.65 211.78 208.78 206.74 CTR dec | 28.02 108.82 200.45 206.62 211.78 208.74 206.70 sm4-ce-cipher ECB enc | 336.79 587.13 682.70 747.37 803.75 811.52 818.06 ECB dec | 339.18 584.52 679.72 743.68 798.82 803.83 811.54 CBC enc | 316.63 521.47 597.00 647.14 690.82 695.21 700.55 CBC dec | 291.80 503.79 585.66 640.82 689.86 695.16 701.72 CFB enc | 294.79 482.31 552.13 594.71 631.60 628.91 638.92 CFB dec | 293.09 466.44 526.56 563.17 594.41 592.26 601.97 CTR enc | 309.61 506.13 576.86 620.47 656.38 654.51 665.10 CTR dec | 306.69 505.57 576.84 620.18 657.09 654.52 665.32 sm4-ce ECB enc | 366.96 1329.81 2024.29 2755.50 3790.07 3861.91 4051.40 ECB dec | 367.30 1323.93 2018.72 2747.43 3787.39 3862.55 4052.62 CBC enc | 358.09 682.68 807.24 885.35 958.29 963.60 973.73 CBC dec | 366.51 1303.63 1978.64 2667.93 3624.53 3683.41 3856.08 CFB enc | 351.51 681.26 807.81 893.10 968.54 969.17 985.83 CFB dec | 354.98 1266.61 1929.63 2634.81 3614.23 3611.59 3841.68 CTR enc | 324.23 1121.25 1689.44 2256.70 2981.90 3007.79 3060.74 CTR dec | 324.18 1120.44 1694.31 2258.32 2982.01 3010.09 3060.99 Signed-off-by: Tianjia Zhang <tianjia.zhang@linux.alibaba.com> Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
2022-03-15 17:44:54 +08:00
st1 {v8.16b-v11.16b}, [x1], #64
crypto: arm64/sm4 - add ARMv8 Crypto Extensions implementation This adds ARMv8 implementations of SM4 in ECB, CBC, CFB and CTR modes using Crypto Extensions, also includes key expansion operations because the Crypto Extensions instruction is much faster than software implementations. The Crypto Extensions for SM4 can only run on ARMv8 implementations that have support for these optional extensions. Benchmark on T-Head Yitian-710 2.75 GHz, the data comes from the 218 mode of tcrypt. The abscissas are blocks of different lengths. The data is tabulated and the unit is Mb/s: sm4-generic | 16 64 128 256 1024 1420 4096 ECB enc | 80.05 91.42 93.66 94.77 95.69 95.77 95.86 ECB dec | 79.98 91.41 93.64 94.76 95.66 95.77 95.85 CBC enc | 78.55 86.50 88.02 88.77 89.36 89.42 89.48 CBC dec | 76.82 89.06 91.52 92.77 93.75 93.83 93.96 CFB enc | 77.64 86.13 87.62 88.42 89.08 88.83 89.18 CFB dec | 77.57 88.34 90.36 91.45 92.34 92.00 92.44 CTR enc | 77.80 88.28 90.23 91.22 92.11 91.81 92.25 CTR dec | 77.83 88.22 90.22 91.22 92.04 91.82 92.28 sm4-neon ECB enc | 28.31 112.77 203.03 209.89 215.49 202.11 210.59 ECB dec | 28.36 113.45 203.23 210.00 215.52 202.13 210.65 CBC enc | 79.32 87.02 88.51 89.28 89.85 89.89 89.97 CBC dec | 28.29 112.20 203.30 209.82 214.99 201.51 209.95 CFB enc | 79.59 87.16 88.54 89.30 89.83 89.62 89.92 CFB dec | 28.12 111.05 202.47 209.02 214.21 210.90 209.12 CTR enc | 28.04 108.81 200.62 206.65 211.78 208.78 206.74 CTR dec | 28.02 108.82 200.45 206.62 211.78 208.74 206.70 sm4-ce-cipher ECB enc | 336.79 587.13 682.70 747.37 803.75 811.52 818.06 ECB dec | 339.18 584.52 679.72 743.68 798.82 803.83 811.54 CBC enc | 316.63 521.47 597.00 647.14 690.82 695.21 700.55 CBC dec | 291.80 503.79 585.66 640.82 689.86 695.16 701.72 CFB enc | 294.79 482.31 552.13 594.71 631.60 628.91 638.92 CFB dec | 293.09 466.44 526.56 563.17 594.41 592.26 601.97 CTR enc | 309.61 506.13 576.86 620.47 656.38 654.51 665.10 CTR dec | 306.69 505.57 576.84 620.18 657.09 654.52 665.32 sm4-ce ECB enc | 366.96 1329.81 2024.29 2755.50 3790.07 3861.91 4051.40 ECB dec | 367.30 1323.93 2018.72 2747.43 3787.39 3862.55 4052.62 CBC enc | 358.09 682.68 807.24 885.35 958.29 963.60 973.73 CBC dec | 366.51 1303.63 1978.64 2667.93 3624.53 3683.41 3856.08 CFB enc | 351.51 681.26 807.81 893.10 968.54 969.17 985.83 CFB dec | 354.98 1266.61 1929.63 2634.81 3614.23 3611.59 3841.68 CTR enc | 324.23 1121.25 1689.44 2256.70 2981.90 3007.79 3060.74 CTR dec | 324.18 1120.44 1694.31 2258.32 2982.01 3010.09 3060.99 Signed-off-by: Tianjia Zhang <tianjia.zhang@linux.alibaba.com> Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
2022-03-15 17:44:54 +08:00
mov RIV.16b, v3.16b
crypto: arm64/sm4 - add ARMv8 Crypto Extensions implementation This adds ARMv8 implementations of SM4 in ECB, CBC, CFB and CTR modes using Crypto Extensions, also includes key expansion operations because the Crypto Extensions instruction is much faster than software implementations. The Crypto Extensions for SM4 can only run on ARMv8 implementations that have support for these optional extensions. Benchmark on T-Head Yitian-710 2.75 GHz, the data comes from the 218 mode of tcrypt. The abscissas are blocks of different lengths. The data is tabulated and the unit is Mb/s: sm4-generic | 16 64 128 256 1024 1420 4096 ECB enc | 80.05 91.42 93.66 94.77 95.69 95.77 95.86 ECB dec | 79.98 91.41 93.64 94.76 95.66 95.77 95.85 CBC enc | 78.55 86.50 88.02 88.77 89.36 89.42 89.48 CBC dec | 76.82 89.06 91.52 92.77 93.75 93.83 93.96 CFB enc | 77.64 86.13 87.62 88.42 89.08 88.83 89.18 CFB dec | 77.57 88.34 90.36 91.45 92.34 92.00 92.44 CTR enc | 77.80 88.28 90.23 91.22 92.11 91.81 92.25 CTR dec | 77.83 88.22 90.22 91.22 92.04 91.82 92.28 sm4-neon ECB enc | 28.31 112.77 203.03 209.89 215.49 202.11 210.59 ECB dec | 28.36 113.45 203.23 210.00 215.52 202.13 210.65 CBC enc | 79.32 87.02 88.51 89.28 89.85 89.89 89.97 CBC dec | 28.29 112.20 203.30 209.82 214.99 201.51 209.95 CFB enc | 79.59 87.16 88.54 89.30 89.83 89.62 89.92 CFB dec | 28.12 111.05 202.47 209.02 214.21 210.90 209.12 CTR enc | 28.04 108.81 200.62 206.65 211.78 208.78 206.74 CTR dec | 28.02 108.82 200.45 206.62 211.78 208.74 206.70 sm4-ce-cipher ECB enc | 336.79 587.13 682.70 747.37 803.75 811.52 818.06 ECB dec | 339.18 584.52 679.72 743.68 798.82 803.83 811.54 CBC enc | 316.63 521.47 597.00 647.14 690.82 695.21 700.55 CBC dec | 291.80 503.79 585.66 640.82 689.86 695.16 701.72 CFB enc | 294.79 482.31 552.13 594.71 631.60 628.91 638.92 CFB dec | 293.09 466.44 526.56 563.17 594.41 592.26 601.97 CTR enc | 309.61 506.13 576.86 620.47 656.38 654.51 665.10 CTR dec | 306.69 505.57 576.84 620.18 657.09 654.52 665.32 sm4-ce ECB enc | 366.96 1329.81 2024.29 2755.50 3790.07 3861.91 4051.40 ECB dec | 367.30 1323.93 2018.72 2747.43 3787.39 3862.55 4052.62 CBC enc | 358.09 682.68 807.24 885.35 958.29 963.60 973.73 CBC dec | 366.51 1303.63 1978.64 2667.93 3624.53 3683.41 3856.08 CFB enc | 351.51 681.26 807.81 893.10 968.54 969.17 985.83 CFB dec | 354.98 1266.61 1929.63 2634.81 3614.23 3611.59 3841.68 CTR enc | 324.23 1121.25 1689.44 2256.70 2981.90 3007.79 3060.74 CTR dec | 324.18 1120.44 1694.31 2258.32 2982.01 3010.09 3060.99 Signed-off-by: Tianjia Zhang <tianjia.zhang@linux.alibaba.com> Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
2022-03-15 17:44:54 +08:00
cbz w4, .Lcbc_dec_end
crypto: arm64/sm4 - add ARMv8 Crypto Extensions implementation This adds ARMv8 implementations of SM4 in ECB, CBC, CFB and CTR modes using Crypto Extensions, also includes key expansion operations because the Crypto Extensions instruction is much faster than software implementations. The Crypto Extensions for SM4 can only run on ARMv8 implementations that have support for these optional extensions. Benchmark on T-Head Yitian-710 2.75 GHz, the data comes from the 218 mode of tcrypt. The abscissas are blocks of different lengths. The data is tabulated and the unit is Mb/s: sm4-generic | 16 64 128 256 1024 1420 4096 ECB enc | 80.05 91.42 93.66 94.77 95.69 95.77 95.86 ECB dec | 79.98 91.41 93.64 94.76 95.66 95.77 95.85 CBC enc | 78.55 86.50 88.02 88.77 89.36 89.42 89.48 CBC dec | 76.82 89.06 91.52 92.77 93.75 93.83 93.96 CFB enc | 77.64 86.13 87.62 88.42 89.08 88.83 89.18 CFB dec | 77.57 88.34 90.36 91.45 92.34 92.00 92.44 CTR enc | 77.80 88.28 90.23 91.22 92.11 91.81 92.25 CTR dec | 77.83 88.22 90.22 91.22 92.04 91.82 92.28 sm4-neon ECB enc | 28.31 112.77 203.03 209.89 215.49 202.11 210.59 ECB dec | 28.36 113.45 203.23 210.00 215.52 202.13 210.65 CBC enc | 79.32 87.02 88.51 89.28 89.85 89.89 89.97 CBC dec | 28.29 112.20 203.30 209.82 214.99 201.51 209.95 CFB enc | 79.59 87.16 88.54 89.30 89.83 89.62 89.92 CFB dec | 28.12 111.05 202.47 209.02 214.21 210.90 209.12 CTR enc | 28.04 108.81 200.62 206.65 211.78 208.78 206.74 CTR dec | 28.02 108.82 200.45 206.62 211.78 208.74 206.70 sm4-ce-cipher ECB enc | 336.79 587.13 682.70 747.37 803.75 811.52 818.06 ECB dec | 339.18 584.52 679.72 743.68 798.82 803.83 811.54 CBC enc | 316.63 521.47 597.00 647.14 690.82 695.21 700.55 CBC dec | 291.80 503.79 585.66 640.82 689.86 695.16 701.72 CFB enc | 294.79 482.31 552.13 594.71 631.60 628.91 638.92 CFB dec | 293.09 466.44 526.56 563.17 594.41 592.26 601.97 CTR enc | 309.61 506.13 576.86 620.47 656.38 654.51 665.10 CTR dec | 306.69 505.57 576.84 620.18 657.09 654.52 665.32 sm4-ce ECB enc | 366.96 1329.81 2024.29 2755.50 3790.07 3861.91 4051.40 ECB dec | 367.30 1323.93 2018.72 2747.43 3787.39 3862.55 4052.62 CBC enc | 358.09 682.68 807.24 885.35 958.29 963.60 973.73 CBC dec | 366.51 1303.63 1978.64 2667.93 3624.53 3683.41 3856.08 CFB enc | 351.51 681.26 807.81 893.10 968.54 969.17 985.83 CFB dec | 354.98 1266.61 1929.63 2634.81 3614.23 3611.59 3841.68 CTR enc | 324.23 1121.25 1689.44 2256.70 2981.90 3007.79 3060.74 CTR dec | 324.18 1120.44 1694.31 2258.32 2982.01 3010.09 3060.99 Signed-off-by: Tianjia Zhang <tianjia.zhang@linux.alibaba.com> Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
2022-03-15 17:44:54 +08:00
.Lcbc_dec_loop_1x:
sub w4, w4, #1
ld1 {v0.16b}, [x2], #16
rev32 v8.16b, v0.16b
SM4_CRYPT_BLK_BE(v8)
crypto: arm64/sm4 - add ARMv8 Crypto Extensions implementation This adds ARMv8 implementations of SM4 in ECB, CBC, CFB and CTR modes using Crypto Extensions, also includes key expansion operations because the Crypto Extensions instruction is much faster than software implementations. The Crypto Extensions for SM4 can only run on ARMv8 implementations that have support for these optional extensions. Benchmark on T-Head Yitian-710 2.75 GHz, the data comes from the 218 mode of tcrypt. The abscissas are blocks of different lengths. The data is tabulated and the unit is Mb/s: sm4-generic | 16 64 128 256 1024 1420 4096 ECB enc | 80.05 91.42 93.66 94.77 95.69 95.77 95.86 ECB dec | 79.98 91.41 93.64 94.76 95.66 95.77 95.85 CBC enc | 78.55 86.50 88.02 88.77 89.36 89.42 89.48 CBC dec | 76.82 89.06 91.52 92.77 93.75 93.83 93.96 CFB enc | 77.64 86.13 87.62 88.42 89.08 88.83 89.18 CFB dec | 77.57 88.34 90.36 91.45 92.34 92.00 92.44 CTR enc | 77.80 88.28 90.23 91.22 92.11 91.81 92.25 CTR dec | 77.83 88.22 90.22 91.22 92.04 91.82 92.28 sm4-neon ECB enc | 28.31 112.77 203.03 209.89 215.49 202.11 210.59 ECB dec | 28.36 113.45 203.23 210.00 215.52 202.13 210.65 CBC enc | 79.32 87.02 88.51 89.28 89.85 89.89 89.97 CBC dec | 28.29 112.20 203.30 209.82 214.99 201.51 209.95 CFB enc | 79.59 87.16 88.54 89.30 89.83 89.62 89.92 CFB dec | 28.12 111.05 202.47 209.02 214.21 210.90 209.12 CTR enc | 28.04 108.81 200.62 206.65 211.78 208.78 206.74 CTR dec | 28.02 108.82 200.45 206.62 211.78 208.74 206.70 sm4-ce-cipher ECB enc | 336.79 587.13 682.70 747.37 803.75 811.52 818.06 ECB dec | 339.18 584.52 679.72 743.68 798.82 803.83 811.54 CBC enc | 316.63 521.47 597.00 647.14 690.82 695.21 700.55 CBC dec | 291.80 503.79 585.66 640.82 689.86 695.16 701.72 CFB enc | 294.79 482.31 552.13 594.71 631.60 628.91 638.92 CFB dec | 293.09 466.44 526.56 563.17 594.41 592.26 601.97 CTR enc | 309.61 506.13 576.86 620.47 656.38 654.51 665.10 CTR dec | 306.69 505.57 576.84 620.18 657.09 654.52 665.32 sm4-ce ECB enc | 366.96 1329.81 2024.29 2755.50 3790.07 3861.91 4051.40 ECB dec | 367.30 1323.93 2018.72 2747.43 3787.39 3862.55 4052.62 CBC enc | 358.09 682.68 807.24 885.35 958.29 963.60 973.73 CBC dec | 366.51 1303.63 1978.64 2667.93 3624.53 3683.41 3856.08 CFB enc | 351.51 681.26 807.81 893.10 968.54 969.17 985.83 CFB dec | 354.98 1266.61 1929.63 2634.81 3614.23 3611.59 3841.68 CTR enc | 324.23 1121.25 1689.44 2256.70 2981.90 3007.79 3060.74 CTR dec | 324.18 1120.44 1694.31 2258.32 2982.01 3010.09 3060.99 Signed-off-by: Tianjia Zhang <tianjia.zhang@linux.alibaba.com> Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
2022-03-15 17:44:54 +08:00
eor v8.16b, v8.16b, RIV.16b
st1 {v8.16b}, [x1], #16
crypto: arm64/sm4 - add ARMv8 Crypto Extensions implementation This adds ARMv8 implementations of SM4 in ECB, CBC, CFB and CTR modes using Crypto Extensions, also includes key expansion operations because the Crypto Extensions instruction is much faster than software implementations. The Crypto Extensions for SM4 can only run on ARMv8 implementations that have support for these optional extensions. Benchmark on T-Head Yitian-710 2.75 GHz, the data comes from the 218 mode of tcrypt. The abscissas are blocks of different lengths. The data is tabulated and the unit is Mb/s: sm4-generic | 16 64 128 256 1024 1420 4096 ECB enc | 80.05 91.42 93.66 94.77 95.69 95.77 95.86 ECB dec | 79.98 91.41 93.64 94.76 95.66 95.77 95.85 CBC enc | 78.55 86.50 88.02 88.77 89.36 89.42 89.48 CBC dec | 76.82 89.06 91.52 92.77 93.75 93.83 93.96 CFB enc | 77.64 86.13 87.62 88.42 89.08 88.83 89.18 CFB dec | 77.57 88.34 90.36 91.45 92.34 92.00 92.44 CTR enc | 77.80 88.28 90.23 91.22 92.11 91.81 92.25 CTR dec | 77.83 88.22 90.22 91.22 92.04 91.82 92.28 sm4-neon ECB enc | 28.31 112.77 203.03 209.89 215.49 202.11 210.59 ECB dec | 28.36 113.45 203.23 210.00 215.52 202.13 210.65 CBC enc | 79.32 87.02 88.51 89.28 89.85 89.89 89.97 CBC dec | 28.29 112.20 203.30 209.82 214.99 201.51 209.95 CFB enc | 79.59 87.16 88.54 89.30 89.83 89.62 89.92 CFB dec | 28.12 111.05 202.47 209.02 214.21 210.90 209.12 CTR enc | 28.04 108.81 200.62 206.65 211.78 208.78 206.74 CTR dec | 28.02 108.82 200.45 206.62 211.78 208.74 206.70 sm4-ce-cipher ECB enc | 336.79 587.13 682.70 747.37 803.75 811.52 818.06 ECB dec | 339.18 584.52 679.72 743.68 798.82 803.83 811.54 CBC enc | 316.63 521.47 597.00 647.14 690.82 695.21 700.55 CBC dec | 291.80 503.79 585.66 640.82 689.86 695.16 701.72 CFB enc | 294.79 482.31 552.13 594.71 631.60 628.91 638.92 CFB dec | 293.09 466.44 526.56 563.17 594.41 592.26 601.97 CTR enc | 309.61 506.13 576.86 620.47 656.38 654.51 665.10 CTR dec | 306.69 505.57 576.84 620.18 657.09 654.52 665.32 sm4-ce ECB enc | 366.96 1329.81 2024.29 2755.50 3790.07 3861.91 4051.40 ECB dec | 367.30 1323.93 2018.72 2747.43 3787.39 3862.55 4052.62 CBC enc | 358.09 682.68 807.24 885.35 958.29 963.60 973.73 CBC dec | 366.51 1303.63 1978.64 2667.93 3624.53 3683.41 3856.08 CFB enc | 351.51 681.26 807.81 893.10 968.54 969.17 985.83 CFB dec | 354.98 1266.61 1929.63 2634.81 3614.23 3611.59 3841.68 CTR enc | 324.23 1121.25 1689.44 2256.70 2981.90 3007.79 3060.74 CTR dec | 324.18 1120.44 1694.31 2258.32 2982.01 3010.09 3060.99 Signed-off-by: Tianjia Zhang <tianjia.zhang@linux.alibaba.com> Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
2022-03-15 17:44:54 +08:00
mov RIV.16b, v0.16b
cbnz w4, .Lcbc_dec_loop_1x
.Lcbc_dec_end:
crypto: arm64/sm4 - add ARMv8 Crypto Extensions implementation This adds ARMv8 implementations of SM4 in ECB, CBC, CFB and CTR modes using Crypto Extensions, also includes key expansion operations because the Crypto Extensions instruction is much faster than software implementations. The Crypto Extensions for SM4 can only run on ARMv8 implementations that have support for these optional extensions. Benchmark on T-Head Yitian-710 2.75 GHz, the data comes from the 218 mode of tcrypt. The abscissas are blocks of different lengths. The data is tabulated and the unit is Mb/s: sm4-generic | 16 64 128 256 1024 1420 4096 ECB enc | 80.05 91.42 93.66 94.77 95.69 95.77 95.86 ECB dec | 79.98 91.41 93.64 94.76 95.66 95.77 95.85 CBC enc | 78.55 86.50 88.02 88.77 89.36 89.42 89.48 CBC dec | 76.82 89.06 91.52 92.77 93.75 93.83 93.96 CFB enc | 77.64 86.13 87.62 88.42 89.08 88.83 89.18 CFB dec | 77.57 88.34 90.36 91.45 92.34 92.00 92.44 CTR enc | 77.80 88.28 90.23 91.22 92.11 91.81 92.25 CTR dec | 77.83 88.22 90.22 91.22 92.04 91.82 92.28 sm4-neon ECB enc | 28.31 112.77 203.03 209.89 215.49 202.11 210.59 ECB dec | 28.36 113.45 203.23 210.00 215.52 202.13 210.65 CBC enc | 79.32 87.02 88.51 89.28 89.85 89.89 89.97 CBC dec | 28.29 112.20 203.30 209.82 214.99 201.51 209.95 CFB enc | 79.59 87.16 88.54 89.30 89.83 89.62 89.92 CFB dec | 28.12 111.05 202.47 209.02 214.21 210.90 209.12 CTR enc | 28.04 108.81 200.62 206.65 211.78 208.78 206.74 CTR dec | 28.02 108.82 200.45 206.62 211.78 208.74 206.70 sm4-ce-cipher ECB enc | 336.79 587.13 682.70 747.37 803.75 811.52 818.06 ECB dec | 339.18 584.52 679.72 743.68 798.82 803.83 811.54 CBC enc | 316.63 521.47 597.00 647.14 690.82 695.21 700.55 CBC dec | 291.80 503.79 585.66 640.82 689.86 695.16 701.72 CFB enc | 294.79 482.31 552.13 594.71 631.60 628.91 638.92 CFB dec | 293.09 466.44 526.56 563.17 594.41 592.26 601.97 CTR enc | 309.61 506.13 576.86 620.47 656.38 654.51 665.10 CTR dec | 306.69 505.57 576.84 620.18 657.09 654.52 665.32 sm4-ce ECB enc | 366.96 1329.81 2024.29 2755.50 3790.07 3861.91 4051.40 ECB dec | 367.30 1323.93 2018.72 2747.43 3787.39 3862.55 4052.62 CBC enc | 358.09 682.68 807.24 885.35 958.29 963.60 973.73 CBC dec | 366.51 1303.63 1978.64 2667.93 3624.53 3683.41 3856.08 CFB enc | 351.51 681.26 807.81 893.10 968.54 969.17 985.83 CFB dec | 354.98 1266.61 1929.63 2634.81 3614.23 3611.59 3841.68 CTR enc | 324.23 1121.25 1689.44 2256.70 2981.90 3007.79 3060.74 CTR dec | 324.18 1120.44 1694.31 2258.32 2982.01 3010.09 3060.99 Signed-off-by: Tianjia Zhang <tianjia.zhang@linux.alibaba.com> Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
2022-03-15 17:44:54 +08:00
/* store new IV */
st1 {RIV.16b}, [x3]
crypto: arm64/sm4 - add ARMv8 Crypto Extensions implementation This adds ARMv8 implementations of SM4 in ECB, CBC, CFB and CTR modes using Crypto Extensions, also includes key expansion operations because the Crypto Extensions instruction is much faster than software implementations. The Crypto Extensions for SM4 can only run on ARMv8 implementations that have support for these optional extensions. Benchmark on T-Head Yitian-710 2.75 GHz, the data comes from the 218 mode of tcrypt. The abscissas are blocks of different lengths. The data is tabulated and the unit is Mb/s: sm4-generic | 16 64 128 256 1024 1420 4096 ECB enc | 80.05 91.42 93.66 94.77 95.69 95.77 95.86 ECB dec | 79.98 91.41 93.64 94.76 95.66 95.77 95.85 CBC enc | 78.55 86.50 88.02 88.77 89.36 89.42 89.48 CBC dec | 76.82 89.06 91.52 92.77 93.75 93.83 93.96 CFB enc | 77.64 86.13 87.62 88.42 89.08 88.83 89.18 CFB dec | 77.57 88.34 90.36 91.45 92.34 92.00 92.44 CTR enc | 77.80 88.28 90.23 91.22 92.11 91.81 92.25 CTR dec | 77.83 88.22 90.22 91.22 92.04 91.82 92.28 sm4-neon ECB enc | 28.31 112.77 203.03 209.89 215.49 202.11 210.59 ECB dec | 28.36 113.45 203.23 210.00 215.52 202.13 210.65 CBC enc | 79.32 87.02 88.51 89.28 89.85 89.89 89.97 CBC dec | 28.29 112.20 203.30 209.82 214.99 201.51 209.95 CFB enc | 79.59 87.16 88.54 89.30 89.83 89.62 89.92 CFB dec | 28.12 111.05 202.47 209.02 214.21 210.90 209.12 CTR enc | 28.04 108.81 200.62 206.65 211.78 208.78 206.74 CTR dec | 28.02 108.82 200.45 206.62 211.78 208.74 206.70 sm4-ce-cipher ECB enc | 336.79 587.13 682.70 747.37 803.75 811.52 818.06 ECB dec | 339.18 584.52 679.72 743.68 798.82 803.83 811.54 CBC enc | 316.63 521.47 597.00 647.14 690.82 695.21 700.55 CBC dec | 291.80 503.79 585.66 640.82 689.86 695.16 701.72 CFB enc | 294.79 482.31 552.13 594.71 631.60 628.91 638.92 CFB dec | 293.09 466.44 526.56 563.17 594.41 592.26 601.97 CTR enc | 309.61 506.13 576.86 620.47 656.38 654.51 665.10 CTR dec | 306.69 505.57 576.84 620.18 657.09 654.52 665.32 sm4-ce ECB enc | 366.96 1329.81 2024.29 2755.50 3790.07 3861.91 4051.40 ECB dec | 367.30 1323.93 2018.72 2747.43 3787.39 3862.55 4052.62 CBC enc | 358.09 682.68 807.24 885.35 958.29 963.60 973.73 CBC dec | 366.51 1303.63 1978.64 2667.93 3624.53 3683.41 3856.08 CFB enc | 351.51 681.26 807.81 893.10 968.54 969.17 985.83 CFB dec | 354.98 1266.61 1929.63 2634.81 3614.23 3611.59 3841.68 CTR enc | 324.23 1121.25 1689.44 2256.70 2981.90 3007.79 3060.74 CTR dec | 324.18 1120.44 1694.31 2258.32 2982.01 3010.09 3060.99 Signed-off-by: Tianjia Zhang <tianjia.zhang@linux.alibaba.com> Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
2022-03-15 17:44:54 +08:00
ret
crypto: arm64/sm4 - add ARMv8 Crypto Extensions implementation This adds ARMv8 implementations of SM4 in ECB, CBC, CFB and CTR modes using Crypto Extensions, also includes key expansion operations because the Crypto Extensions instruction is much faster than software implementations. The Crypto Extensions for SM4 can only run on ARMv8 implementations that have support for these optional extensions. Benchmark on T-Head Yitian-710 2.75 GHz, the data comes from the 218 mode of tcrypt. The abscissas are blocks of different lengths. The data is tabulated and the unit is Mb/s: sm4-generic | 16 64 128 256 1024 1420 4096 ECB enc | 80.05 91.42 93.66 94.77 95.69 95.77 95.86 ECB dec | 79.98 91.41 93.64 94.76 95.66 95.77 95.85 CBC enc | 78.55 86.50 88.02 88.77 89.36 89.42 89.48 CBC dec | 76.82 89.06 91.52 92.77 93.75 93.83 93.96 CFB enc | 77.64 86.13 87.62 88.42 89.08 88.83 89.18 CFB dec | 77.57 88.34 90.36 91.45 92.34 92.00 92.44 CTR enc | 77.80 88.28 90.23 91.22 92.11 91.81 92.25 CTR dec | 77.83 88.22 90.22 91.22 92.04 91.82 92.28 sm4-neon ECB enc | 28.31 112.77 203.03 209.89 215.49 202.11 210.59 ECB dec | 28.36 113.45 203.23 210.00 215.52 202.13 210.65 CBC enc | 79.32 87.02 88.51 89.28 89.85 89.89 89.97 CBC dec | 28.29 112.20 203.30 209.82 214.99 201.51 209.95 CFB enc | 79.59 87.16 88.54 89.30 89.83 89.62 89.92 CFB dec | 28.12 111.05 202.47 209.02 214.21 210.90 209.12 CTR enc | 28.04 108.81 200.62 206.65 211.78 208.78 206.74 CTR dec | 28.02 108.82 200.45 206.62 211.78 208.74 206.70 sm4-ce-cipher ECB enc | 336.79 587.13 682.70 747.37 803.75 811.52 818.06 ECB dec | 339.18 584.52 679.72 743.68 798.82 803.83 811.54 CBC enc | 316.63 521.47 597.00 647.14 690.82 695.21 700.55 CBC dec | 291.80 503.79 585.66 640.82 689.86 695.16 701.72 CFB enc | 294.79 482.31 552.13 594.71 631.60 628.91 638.92 CFB dec | 293.09 466.44 526.56 563.17 594.41 592.26 601.97 CTR enc | 309.61 506.13 576.86 620.47 656.38 654.51 665.10 CTR dec | 306.69 505.57 576.84 620.18 657.09 654.52 665.32 sm4-ce ECB enc | 366.96 1329.81 2024.29 2755.50 3790.07 3861.91 4051.40 ECB dec | 367.30 1323.93 2018.72 2747.43 3787.39 3862.55 4052.62 CBC enc | 358.09 682.68 807.24 885.35 958.29 963.60 973.73 CBC dec | 366.51 1303.63 1978.64 2667.93 3624.53 3683.41 3856.08 CFB enc | 351.51 681.26 807.81 893.10 968.54 969.17 985.83 CFB dec | 354.98 1266.61 1929.63 2634.81 3614.23 3611.59 3841.68 CTR enc | 324.23 1121.25 1689.44 2256.70 2981.90 3007.79 3060.74 CTR dec | 324.18 1120.44 1694.31 2258.32 2982.01 3010.09 3060.99 Signed-off-by: Tianjia Zhang <tianjia.zhang@linux.alibaba.com> Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
2022-03-15 17:44:54 +08:00
SYM_FUNC_END(sm4_ce_cbc_dec)
.align 3
SYM_FUNC_START(sm4_ce_cbc_cts_enc)
/* input:
* x0: round key array, CTX
* x1: dst
* x2: src
* x3: iv (big endian, 128 bit)
* w4: nbytes
*/
SM4_PREPARE(x0)
sub w5, w4, #16
uxtw x5, w5
ld1 {RIV.16b}, [x3]
ld1 {v0.16b}, [x2]
eor RIV.16b, RIV.16b, v0.16b
SM4_CRYPT_BLK(RIV)
/* load permute table */
adr_l x6, .Lcts_permute_table
add x7, x6, #32
add x6, x6, x5
sub x7, x7, x5
ld1 {v3.16b}, [x6]
ld1 {v4.16b}, [x7]
/* overlapping loads */
add x2, x2, x5
ld1 {v1.16b}, [x2]
/* create Cn from En-1 */
tbl v0.16b, {RIV.16b}, v3.16b
/* padding Pn with zeros */
tbl v1.16b, {v1.16b}, v4.16b
eor v1.16b, v1.16b, RIV.16b
SM4_CRYPT_BLK(v1)
/* overlapping stores */
add x5, x1, x5
st1 {v0.16b}, [x5]
st1 {v1.16b}, [x1]
ret
SYM_FUNC_END(sm4_ce_cbc_cts_enc)
.align 3
SYM_FUNC_START(sm4_ce_cbc_cts_dec)
/* input:
* x0: round key array, CTX
* x1: dst
* x2: src
* x3: iv (big endian, 128 bit)
* w4: nbytes
*/
SM4_PREPARE(x0)
sub w5, w4, #16
uxtw x5, w5
ld1 {RIV.16b}, [x3]
/* load permute table */
adr_l x6, .Lcts_permute_table
add x7, x6, #32
add x6, x6, x5
sub x7, x7, x5
ld1 {v3.16b}, [x6]
ld1 {v4.16b}, [x7]
/* overlapping loads */
ld1 {v0.16b}, [x2], x5
ld1 {v1.16b}, [x2]
SM4_CRYPT_BLK(v0)
/* select the first Ln bytes of Xn to create Pn */
tbl v2.16b, {v0.16b}, v3.16b
eor v2.16b, v2.16b, v1.16b
/* overwrite the first Ln bytes with Cn to create En-1 */
tbx v0.16b, {v1.16b}, v4.16b
SM4_CRYPT_BLK(v0)
eor v0.16b, v0.16b, RIV.16b
/* overlapping stores */
add x5, x1, x5
st1 {v2.16b}, [x5]
st1 {v0.16b}, [x1]
ret
SYM_FUNC_END(sm4_ce_cbc_cts_dec)
crypto: arm64/sm4 - add ARMv8 Crypto Extensions implementation This adds ARMv8 implementations of SM4 in ECB, CBC, CFB and CTR modes using Crypto Extensions, also includes key expansion operations because the Crypto Extensions instruction is much faster than software implementations. The Crypto Extensions for SM4 can only run on ARMv8 implementations that have support for these optional extensions. Benchmark on T-Head Yitian-710 2.75 GHz, the data comes from the 218 mode of tcrypt. The abscissas are blocks of different lengths. The data is tabulated and the unit is Mb/s: sm4-generic | 16 64 128 256 1024 1420 4096 ECB enc | 80.05 91.42 93.66 94.77 95.69 95.77 95.86 ECB dec | 79.98 91.41 93.64 94.76 95.66 95.77 95.85 CBC enc | 78.55 86.50 88.02 88.77 89.36 89.42 89.48 CBC dec | 76.82 89.06 91.52 92.77 93.75 93.83 93.96 CFB enc | 77.64 86.13 87.62 88.42 89.08 88.83 89.18 CFB dec | 77.57 88.34 90.36 91.45 92.34 92.00 92.44 CTR enc | 77.80 88.28 90.23 91.22 92.11 91.81 92.25 CTR dec | 77.83 88.22 90.22 91.22 92.04 91.82 92.28 sm4-neon ECB enc | 28.31 112.77 203.03 209.89 215.49 202.11 210.59 ECB dec | 28.36 113.45 203.23 210.00 215.52 202.13 210.65 CBC enc | 79.32 87.02 88.51 89.28 89.85 89.89 89.97 CBC dec | 28.29 112.20 203.30 209.82 214.99 201.51 209.95 CFB enc | 79.59 87.16 88.54 89.30 89.83 89.62 89.92 CFB dec | 28.12 111.05 202.47 209.02 214.21 210.90 209.12 CTR enc | 28.04 108.81 200.62 206.65 211.78 208.78 206.74 CTR dec | 28.02 108.82 200.45 206.62 211.78 208.74 206.70 sm4-ce-cipher ECB enc | 336.79 587.13 682.70 747.37 803.75 811.52 818.06 ECB dec | 339.18 584.52 679.72 743.68 798.82 803.83 811.54 CBC enc | 316.63 521.47 597.00 647.14 690.82 695.21 700.55 CBC dec | 291.80 503.79 585.66 640.82 689.86 695.16 701.72 CFB enc | 294.79 482.31 552.13 594.71 631.60 628.91 638.92 CFB dec | 293.09 466.44 526.56 563.17 594.41 592.26 601.97 CTR enc | 309.61 506.13 576.86 620.47 656.38 654.51 665.10 CTR dec | 306.69 505.57 576.84 620.18 657.09 654.52 665.32 sm4-ce ECB enc | 366.96 1329.81 2024.29 2755.50 3790.07 3861.91 4051.40 ECB dec | 367.30 1323.93 2018.72 2747.43 3787.39 3862.55 4052.62 CBC enc | 358.09 682.68 807.24 885.35 958.29 963.60 973.73 CBC dec | 366.51 1303.63 1978.64 2667.93 3624.53 3683.41 3856.08 CFB enc | 351.51 681.26 807.81 893.10 968.54 969.17 985.83 CFB dec | 354.98 1266.61 1929.63 2634.81 3614.23 3611.59 3841.68 CTR enc | 324.23 1121.25 1689.44 2256.70 2981.90 3007.79 3060.74 CTR dec | 324.18 1120.44 1694.31 2258.32 2982.01 3010.09 3060.99 Signed-off-by: Tianjia Zhang <tianjia.zhang@linux.alibaba.com> Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
2022-03-15 17:44:54 +08:00
.align 3
SYM_FUNC_START(sm4_ce_cfb_enc)
/* input:
* x0: round key array, CTX
* x1: dst
* x2: src
* x3: iv (big endian, 128 bit)
* w4: nblocks
*/
SM4_PREPARE(x0)
ld1 {RIV.16b}, [x3]
.Lcfb_enc_loop_4x:
cmp w4, #4
blt .Lcfb_enc_loop_1x
sub w4, w4, #4
ld1 {v0.16b-v3.16b}, [x2], #64
rev32 v8.16b, RIV.16b
SM4_CRYPT_BLK_BE(v8)
eor v0.16b, v0.16b, v8.16b
rev32 v8.16b, v0.16b
SM4_CRYPT_BLK_BE(v8)
eor v1.16b, v1.16b, v8.16b
rev32 v8.16b, v1.16b
SM4_CRYPT_BLK_BE(v8)
eor v2.16b, v2.16b, v8.16b
rev32 v8.16b, v2.16b
SM4_CRYPT_BLK_BE(v8)
eor v3.16b, v3.16b, v8.16b
crypto: arm64/sm4 - add ARMv8 Crypto Extensions implementation This adds ARMv8 implementations of SM4 in ECB, CBC, CFB and CTR modes using Crypto Extensions, also includes key expansion operations because the Crypto Extensions instruction is much faster than software implementations. The Crypto Extensions for SM4 can only run on ARMv8 implementations that have support for these optional extensions. Benchmark on T-Head Yitian-710 2.75 GHz, the data comes from the 218 mode of tcrypt. The abscissas are blocks of different lengths. The data is tabulated and the unit is Mb/s: sm4-generic | 16 64 128 256 1024 1420 4096 ECB enc | 80.05 91.42 93.66 94.77 95.69 95.77 95.86 ECB dec | 79.98 91.41 93.64 94.76 95.66 95.77 95.85 CBC enc | 78.55 86.50 88.02 88.77 89.36 89.42 89.48 CBC dec | 76.82 89.06 91.52 92.77 93.75 93.83 93.96 CFB enc | 77.64 86.13 87.62 88.42 89.08 88.83 89.18 CFB dec | 77.57 88.34 90.36 91.45 92.34 92.00 92.44 CTR enc | 77.80 88.28 90.23 91.22 92.11 91.81 92.25 CTR dec | 77.83 88.22 90.22 91.22 92.04 91.82 92.28 sm4-neon ECB enc | 28.31 112.77 203.03 209.89 215.49 202.11 210.59 ECB dec | 28.36 113.45 203.23 210.00 215.52 202.13 210.65 CBC enc | 79.32 87.02 88.51 89.28 89.85 89.89 89.97 CBC dec | 28.29 112.20 203.30 209.82 214.99 201.51 209.95 CFB enc | 79.59 87.16 88.54 89.30 89.83 89.62 89.92 CFB dec | 28.12 111.05 202.47 209.02 214.21 210.90 209.12 CTR enc | 28.04 108.81 200.62 206.65 211.78 208.78 206.74 CTR dec | 28.02 108.82 200.45 206.62 211.78 208.74 206.70 sm4-ce-cipher ECB enc | 336.79 587.13 682.70 747.37 803.75 811.52 818.06 ECB dec | 339.18 584.52 679.72 743.68 798.82 803.83 811.54 CBC enc | 316.63 521.47 597.00 647.14 690.82 695.21 700.55 CBC dec | 291.80 503.79 585.66 640.82 689.86 695.16 701.72 CFB enc | 294.79 482.31 552.13 594.71 631.60 628.91 638.92 CFB dec | 293.09 466.44 526.56 563.17 594.41 592.26 601.97 CTR enc | 309.61 506.13 576.86 620.47 656.38 654.51 665.10 CTR dec | 306.69 505.57 576.84 620.18 657.09 654.52 665.32 sm4-ce ECB enc | 366.96 1329.81 2024.29 2755.50 3790.07 3861.91 4051.40 ECB dec | 367.30 1323.93 2018.72 2747.43 3787.39 3862.55 4052.62 CBC enc | 358.09 682.68 807.24 885.35 958.29 963.60 973.73 CBC dec | 366.51 1303.63 1978.64 2667.93 3624.53 3683.41 3856.08 CFB enc | 351.51 681.26 807.81 893.10 968.54 969.17 985.83 CFB dec | 354.98 1266.61 1929.63 2634.81 3614.23 3611.59 3841.68 CTR enc | 324.23 1121.25 1689.44 2256.70 2981.90 3007.79 3060.74 CTR dec | 324.18 1120.44 1694.31 2258.32 2982.01 3010.09 3060.99 Signed-off-by: Tianjia Zhang <tianjia.zhang@linux.alibaba.com> Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
2022-03-15 17:44:54 +08:00
st1 {v0.16b-v3.16b}, [x1], #64
mov RIV.16b, v3.16b
crypto: arm64/sm4 - add ARMv8 Crypto Extensions implementation This adds ARMv8 implementations of SM4 in ECB, CBC, CFB and CTR modes using Crypto Extensions, also includes key expansion operations because the Crypto Extensions instruction is much faster than software implementations. The Crypto Extensions for SM4 can only run on ARMv8 implementations that have support for these optional extensions. Benchmark on T-Head Yitian-710 2.75 GHz, the data comes from the 218 mode of tcrypt. The abscissas are blocks of different lengths. The data is tabulated and the unit is Mb/s: sm4-generic | 16 64 128 256 1024 1420 4096 ECB enc | 80.05 91.42 93.66 94.77 95.69 95.77 95.86 ECB dec | 79.98 91.41 93.64 94.76 95.66 95.77 95.85 CBC enc | 78.55 86.50 88.02 88.77 89.36 89.42 89.48 CBC dec | 76.82 89.06 91.52 92.77 93.75 93.83 93.96 CFB enc | 77.64 86.13 87.62 88.42 89.08 88.83 89.18 CFB dec | 77.57 88.34 90.36 91.45 92.34 92.00 92.44 CTR enc | 77.80 88.28 90.23 91.22 92.11 91.81 92.25 CTR dec | 77.83 88.22 90.22 91.22 92.04 91.82 92.28 sm4-neon ECB enc | 28.31 112.77 203.03 209.89 215.49 202.11 210.59 ECB dec | 28.36 113.45 203.23 210.00 215.52 202.13 210.65 CBC enc | 79.32 87.02 88.51 89.28 89.85 89.89 89.97 CBC dec | 28.29 112.20 203.30 209.82 214.99 201.51 209.95 CFB enc | 79.59 87.16 88.54 89.30 89.83 89.62 89.92 CFB dec | 28.12 111.05 202.47 209.02 214.21 210.90 209.12 CTR enc | 28.04 108.81 200.62 206.65 211.78 208.78 206.74 CTR dec | 28.02 108.82 200.45 206.62 211.78 208.74 206.70 sm4-ce-cipher ECB enc | 336.79 587.13 682.70 747.37 803.75 811.52 818.06 ECB dec | 339.18 584.52 679.72 743.68 798.82 803.83 811.54 CBC enc | 316.63 521.47 597.00 647.14 690.82 695.21 700.55 CBC dec | 291.80 503.79 585.66 640.82 689.86 695.16 701.72 CFB enc | 294.79 482.31 552.13 594.71 631.60 628.91 638.92 CFB dec | 293.09 466.44 526.56 563.17 594.41 592.26 601.97 CTR enc | 309.61 506.13 576.86 620.47 656.38 654.51 665.10 CTR dec | 306.69 505.57 576.84 620.18 657.09 654.52 665.32 sm4-ce ECB enc | 366.96 1329.81 2024.29 2755.50 3790.07 3861.91 4051.40 ECB dec | 367.30 1323.93 2018.72 2747.43 3787.39 3862.55 4052.62 CBC enc | 358.09 682.68 807.24 885.35 958.29 963.60 973.73 CBC dec | 366.51 1303.63 1978.64 2667.93 3624.53 3683.41 3856.08 CFB enc | 351.51 681.26 807.81 893.10 968.54 969.17 985.83 CFB dec | 354.98 1266.61 1929.63 2634.81 3614.23 3611.59 3841.68 CTR enc | 324.23 1121.25 1689.44 2256.70 2981.90 3007.79 3060.74 CTR dec | 324.18 1120.44 1694.31 2258.32 2982.01 3010.09 3060.99 Signed-off-by: Tianjia Zhang <tianjia.zhang@linux.alibaba.com> Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
2022-03-15 17:44:54 +08:00
cbz w4, .Lcfb_enc_end
b .Lcfb_enc_loop_4x
crypto: arm64/sm4 - add ARMv8 Crypto Extensions implementation This adds ARMv8 implementations of SM4 in ECB, CBC, CFB and CTR modes using Crypto Extensions, also includes key expansion operations because the Crypto Extensions instruction is much faster than software implementations. The Crypto Extensions for SM4 can only run on ARMv8 implementations that have support for these optional extensions. Benchmark on T-Head Yitian-710 2.75 GHz, the data comes from the 218 mode of tcrypt. The abscissas are blocks of different lengths. The data is tabulated and the unit is Mb/s: sm4-generic | 16 64 128 256 1024 1420 4096 ECB enc | 80.05 91.42 93.66 94.77 95.69 95.77 95.86 ECB dec | 79.98 91.41 93.64 94.76 95.66 95.77 95.85 CBC enc | 78.55 86.50 88.02 88.77 89.36 89.42 89.48 CBC dec | 76.82 89.06 91.52 92.77 93.75 93.83 93.96 CFB enc | 77.64 86.13 87.62 88.42 89.08 88.83 89.18 CFB dec | 77.57 88.34 90.36 91.45 92.34 92.00 92.44 CTR enc | 77.80 88.28 90.23 91.22 92.11 91.81 92.25 CTR dec | 77.83 88.22 90.22 91.22 92.04 91.82 92.28 sm4-neon ECB enc | 28.31 112.77 203.03 209.89 215.49 202.11 210.59 ECB dec | 28.36 113.45 203.23 210.00 215.52 202.13 210.65 CBC enc | 79.32 87.02 88.51 89.28 89.85 89.89 89.97 CBC dec | 28.29 112.20 203.30 209.82 214.99 201.51 209.95 CFB enc | 79.59 87.16 88.54 89.30 89.83 89.62 89.92 CFB dec | 28.12 111.05 202.47 209.02 214.21 210.90 209.12 CTR enc | 28.04 108.81 200.62 206.65 211.78 208.78 206.74 CTR dec | 28.02 108.82 200.45 206.62 211.78 208.74 206.70 sm4-ce-cipher ECB enc | 336.79 587.13 682.70 747.37 803.75 811.52 818.06 ECB dec | 339.18 584.52 679.72 743.68 798.82 803.83 811.54 CBC enc | 316.63 521.47 597.00 647.14 690.82 695.21 700.55 CBC dec | 291.80 503.79 585.66 640.82 689.86 695.16 701.72 CFB enc | 294.79 482.31 552.13 594.71 631.60 628.91 638.92 CFB dec | 293.09 466.44 526.56 563.17 594.41 592.26 601.97 CTR enc | 309.61 506.13 576.86 620.47 656.38 654.51 665.10 CTR dec | 306.69 505.57 576.84 620.18 657.09 654.52 665.32 sm4-ce ECB enc | 366.96 1329.81 2024.29 2755.50 3790.07 3861.91 4051.40 ECB dec | 367.30 1323.93 2018.72 2747.43 3787.39 3862.55 4052.62 CBC enc | 358.09 682.68 807.24 885.35 958.29 963.60 973.73 CBC dec | 366.51 1303.63 1978.64 2667.93 3624.53 3683.41 3856.08 CFB enc | 351.51 681.26 807.81 893.10 968.54 969.17 985.83 CFB dec | 354.98 1266.61 1929.63 2634.81 3614.23 3611.59 3841.68 CTR enc | 324.23 1121.25 1689.44 2256.70 2981.90 3007.79 3060.74 CTR dec | 324.18 1120.44 1694.31 2258.32 2982.01 3010.09 3060.99 Signed-off-by: Tianjia Zhang <tianjia.zhang@linux.alibaba.com> Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
2022-03-15 17:44:54 +08:00
.Lcfb_enc_loop_1x:
sub w4, w4, #1
crypto: arm64/sm4 - add ARMv8 Crypto Extensions implementation This adds ARMv8 implementations of SM4 in ECB, CBC, CFB and CTR modes using Crypto Extensions, also includes key expansion operations because the Crypto Extensions instruction is much faster than software implementations. The Crypto Extensions for SM4 can only run on ARMv8 implementations that have support for these optional extensions. Benchmark on T-Head Yitian-710 2.75 GHz, the data comes from the 218 mode of tcrypt. The abscissas are blocks of different lengths. The data is tabulated and the unit is Mb/s: sm4-generic | 16 64 128 256 1024 1420 4096 ECB enc | 80.05 91.42 93.66 94.77 95.69 95.77 95.86 ECB dec | 79.98 91.41 93.64 94.76 95.66 95.77 95.85 CBC enc | 78.55 86.50 88.02 88.77 89.36 89.42 89.48 CBC dec | 76.82 89.06 91.52 92.77 93.75 93.83 93.96 CFB enc | 77.64 86.13 87.62 88.42 89.08 88.83 89.18 CFB dec | 77.57 88.34 90.36 91.45 92.34 92.00 92.44 CTR enc | 77.80 88.28 90.23 91.22 92.11 91.81 92.25 CTR dec | 77.83 88.22 90.22 91.22 92.04 91.82 92.28 sm4-neon ECB enc | 28.31 112.77 203.03 209.89 215.49 202.11 210.59 ECB dec | 28.36 113.45 203.23 210.00 215.52 202.13 210.65 CBC enc | 79.32 87.02 88.51 89.28 89.85 89.89 89.97 CBC dec | 28.29 112.20 203.30 209.82 214.99 201.51 209.95 CFB enc | 79.59 87.16 88.54 89.30 89.83 89.62 89.92 CFB dec | 28.12 111.05 202.47 209.02 214.21 210.90 209.12 CTR enc | 28.04 108.81 200.62 206.65 211.78 208.78 206.74 CTR dec | 28.02 108.82 200.45 206.62 211.78 208.74 206.70 sm4-ce-cipher ECB enc | 336.79 587.13 682.70 747.37 803.75 811.52 818.06 ECB dec | 339.18 584.52 679.72 743.68 798.82 803.83 811.54 CBC enc | 316.63 521.47 597.00 647.14 690.82 695.21 700.55 CBC dec | 291.80 503.79 585.66 640.82 689.86 695.16 701.72 CFB enc | 294.79 482.31 552.13 594.71 631.60 628.91 638.92 CFB dec | 293.09 466.44 526.56 563.17 594.41 592.26 601.97 CTR enc | 309.61 506.13 576.86 620.47 656.38 654.51 665.10 CTR dec | 306.69 505.57 576.84 620.18 657.09 654.52 665.32 sm4-ce ECB enc | 366.96 1329.81 2024.29 2755.50 3790.07 3861.91 4051.40 ECB dec | 367.30 1323.93 2018.72 2747.43 3787.39 3862.55 4052.62 CBC enc | 358.09 682.68 807.24 885.35 958.29 963.60 973.73 CBC dec | 366.51 1303.63 1978.64 2667.93 3624.53 3683.41 3856.08 CFB enc | 351.51 681.26 807.81 893.10 968.54 969.17 985.83 CFB dec | 354.98 1266.61 1929.63 2634.81 3614.23 3611.59 3841.68 CTR enc | 324.23 1121.25 1689.44 2256.70 2981.90 3007.79 3060.74 CTR dec | 324.18 1120.44 1694.31 2258.32 2982.01 3010.09 3060.99 Signed-off-by: Tianjia Zhang <tianjia.zhang@linux.alibaba.com> Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
2022-03-15 17:44:54 +08:00
ld1 {v0.16b}, [x2], #16
crypto: arm64/sm4 - add ARMv8 Crypto Extensions implementation This adds ARMv8 implementations of SM4 in ECB, CBC, CFB and CTR modes using Crypto Extensions, also includes key expansion operations because the Crypto Extensions instruction is much faster than software implementations. The Crypto Extensions for SM4 can only run on ARMv8 implementations that have support for these optional extensions. Benchmark on T-Head Yitian-710 2.75 GHz, the data comes from the 218 mode of tcrypt. The abscissas are blocks of different lengths. The data is tabulated and the unit is Mb/s: sm4-generic | 16 64 128 256 1024 1420 4096 ECB enc | 80.05 91.42 93.66 94.77 95.69 95.77 95.86 ECB dec | 79.98 91.41 93.64 94.76 95.66 95.77 95.85 CBC enc | 78.55 86.50 88.02 88.77 89.36 89.42 89.48 CBC dec | 76.82 89.06 91.52 92.77 93.75 93.83 93.96 CFB enc | 77.64 86.13 87.62 88.42 89.08 88.83 89.18 CFB dec | 77.57 88.34 90.36 91.45 92.34 92.00 92.44 CTR enc | 77.80 88.28 90.23 91.22 92.11 91.81 92.25 CTR dec | 77.83 88.22 90.22 91.22 92.04 91.82 92.28 sm4-neon ECB enc | 28.31 112.77 203.03 209.89 215.49 202.11 210.59 ECB dec | 28.36 113.45 203.23 210.00 215.52 202.13 210.65 CBC enc | 79.32 87.02 88.51 89.28 89.85 89.89 89.97 CBC dec | 28.29 112.20 203.30 209.82 214.99 201.51 209.95 CFB enc | 79.59 87.16 88.54 89.30 89.83 89.62 89.92 CFB dec | 28.12 111.05 202.47 209.02 214.21 210.90 209.12 CTR enc | 28.04 108.81 200.62 206.65 211.78 208.78 206.74 CTR dec | 28.02 108.82 200.45 206.62 211.78 208.74 206.70 sm4-ce-cipher ECB enc | 336.79 587.13 682.70 747.37 803.75 811.52 818.06 ECB dec | 339.18 584.52 679.72 743.68 798.82 803.83 811.54 CBC enc | 316.63 521.47 597.00 647.14 690.82 695.21 700.55 CBC dec | 291.80 503.79 585.66 640.82 689.86 695.16 701.72 CFB enc | 294.79 482.31 552.13 594.71 631.60 628.91 638.92 CFB dec | 293.09 466.44 526.56 563.17 594.41 592.26 601.97 CTR enc | 309.61 506.13 576.86 620.47 656.38 654.51 665.10 CTR dec | 306.69 505.57 576.84 620.18 657.09 654.52 665.32 sm4-ce ECB enc | 366.96 1329.81 2024.29 2755.50 3790.07 3861.91 4051.40 ECB dec | 367.30 1323.93 2018.72 2747.43 3787.39 3862.55 4052.62 CBC enc | 358.09 682.68 807.24 885.35 958.29 963.60 973.73 CBC dec | 366.51 1303.63 1978.64 2667.93 3624.53 3683.41 3856.08 CFB enc | 351.51 681.26 807.81 893.10 968.54 969.17 985.83 CFB dec | 354.98 1266.61 1929.63 2634.81 3614.23 3611.59 3841.68 CTR enc | 324.23 1121.25 1689.44 2256.70 2981.90 3007.79 3060.74 CTR dec | 324.18 1120.44 1694.31 2258.32 2982.01 3010.09 3060.99 Signed-off-by: Tianjia Zhang <tianjia.zhang@linux.alibaba.com> Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
2022-03-15 17:44:54 +08:00
SM4_CRYPT_BLK(RIV)
eor RIV.16b, RIV.16b, v0.16b
crypto: arm64/sm4 - add ARMv8 Crypto Extensions implementation This adds ARMv8 implementations of SM4 in ECB, CBC, CFB and CTR modes using Crypto Extensions, also includes key expansion operations because the Crypto Extensions instruction is much faster than software implementations. The Crypto Extensions for SM4 can only run on ARMv8 implementations that have support for these optional extensions. Benchmark on T-Head Yitian-710 2.75 GHz, the data comes from the 218 mode of tcrypt. The abscissas are blocks of different lengths. The data is tabulated and the unit is Mb/s: sm4-generic | 16 64 128 256 1024 1420 4096 ECB enc | 80.05 91.42 93.66 94.77 95.69 95.77 95.86 ECB dec | 79.98 91.41 93.64 94.76 95.66 95.77 95.85 CBC enc | 78.55 86.50 88.02 88.77 89.36 89.42 89.48 CBC dec | 76.82 89.06 91.52 92.77 93.75 93.83 93.96 CFB enc | 77.64 86.13 87.62 88.42 89.08 88.83 89.18 CFB dec | 77.57 88.34 90.36 91.45 92.34 92.00 92.44 CTR enc | 77.80 88.28 90.23 91.22 92.11 91.81 92.25 CTR dec | 77.83 88.22 90.22 91.22 92.04 91.82 92.28 sm4-neon ECB enc | 28.31 112.77 203.03 209.89 215.49 202.11 210.59 ECB dec | 28.36 113.45 203.23 210.00 215.52 202.13 210.65 CBC enc | 79.32 87.02 88.51 89.28 89.85 89.89 89.97 CBC dec | 28.29 112.20 203.30 209.82 214.99 201.51 209.95 CFB enc | 79.59 87.16 88.54 89.30 89.83 89.62 89.92 CFB dec | 28.12 111.05 202.47 209.02 214.21 210.90 209.12 CTR enc | 28.04 108.81 200.62 206.65 211.78 208.78 206.74 CTR dec | 28.02 108.82 200.45 206.62 211.78 208.74 206.70 sm4-ce-cipher ECB enc | 336.79 587.13 682.70 747.37 803.75 811.52 818.06 ECB dec | 339.18 584.52 679.72 743.68 798.82 803.83 811.54 CBC enc | 316.63 521.47 597.00 647.14 690.82 695.21 700.55 CBC dec | 291.80 503.79 585.66 640.82 689.86 695.16 701.72 CFB enc | 294.79 482.31 552.13 594.71 631.60 628.91 638.92 CFB dec | 293.09 466.44 526.56 563.17 594.41 592.26 601.97 CTR enc | 309.61 506.13 576.86 620.47 656.38 654.51 665.10 CTR dec | 306.69 505.57 576.84 620.18 657.09 654.52 665.32 sm4-ce ECB enc | 366.96 1329.81 2024.29 2755.50 3790.07 3861.91 4051.40 ECB dec | 367.30 1323.93 2018.72 2747.43 3787.39 3862.55 4052.62 CBC enc | 358.09 682.68 807.24 885.35 958.29 963.60 973.73 CBC dec | 366.51 1303.63 1978.64 2667.93 3624.53 3683.41 3856.08 CFB enc | 351.51 681.26 807.81 893.10 968.54 969.17 985.83 CFB dec | 354.98 1266.61 1929.63 2634.81 3614.23 3611.59 3841.68 CTR enc | 324.23 1121.25 1689.44 2256.70 2981.90 3007.79 3060.74 CTR dec | 324.18 1120.44 1694.31 2258.32 2982.01 3010.09 3060.99 Signed-off-by: Tianjia Zhang <tianjia.zhang@linux.alibaba.com> Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
2022-03-15 17:44:54 +08:00
st1 {RIV.16b}, [x1], #16
cbnz w4, .Lcfb_enc_loop_1x
.Lcfb_enc_end:
crypto: arm64/sm4 - add ARMv8 Crypto Extensions implementation This adds ARMv8 implementations of SM4 in ECB, CBC, CFB and CTR modes using Crypto Extensions, also includes key expansion operations because the Crypto Extensions instruction is much faster than software implementations. The Crypto Extensions for SM4 can only run on ARMv8 implementations that have support for these optional extensions. Benchmark on T-Head Yitian-710 2.75 GHz, the data comes from the 218 mode of tcrypt. The abscissas are blocks of different lengths. The data is tabulated and the unit is Mb/s: sm4-generic | 16 64 128 256 1024 1420 4096 ECB enc | 80.05 91.42 93.66 94.77 95.69 95.77 95.86 ECB dec | 79.98 91.41 93.64 94.76 95.66 95.77 95.85 CBC enc | 78.55 86.50 88.02 88.77 89.36 89.42 89.48 CBC dec | 76.82 89.06 91.52 92.77 93.75 93.83 93.96 CFB enc | 77.64 86.13 87.62 88.42 89.08 88.83 89.18 CFB dec | 77.57 88.34 90.36 91.45 92.34 92.00 92.44 CTR enc | 77.80 88.28 90.23 91.22 92.11 91.81 92.25 CTR dec | 77.83 88.22 90.22 91.22 92.04 91.82 92.28 sm4-neon ECB enc | 28.31 112.77 203.03 209.89 215.49 202.11 210.59 ECB dec | 28.36 113.45 203.23 210.00 215.52 202.13 210.65 CBC enc | 79.32 87.02 88.51 89.28 89.85 89.89 89.97 CBC dec | 28.29 112.20 203.30 209.82 214.99 201.51 209.95 CFB enc | 79.59 87.16 88.54 89.30 89.83 89.62 89.92 CFB dec | 28.12 111.05 202.47 209.02 214.21 210.90 209.12 CTR enc | 28.04 108.81 200.62 206.65 211.78 208.78 206.74 CTR dec | 28.02 108.82 200.45 206.62 211.78 208.74 206.70 sm4-ce-cipher ECB enc | 336.79 587.13 682.70 747.37 803.75 811.52 818.06 ECB dec | 339.18 584.52 679.72 743.68 798.82 803.83 811.54 CBC enc | 316.63 521.47 597.00 647.14 690.82 695.21 700.55 CBC dec | 291.80 503.79 585.66 640.82 689.86 695.16 701.72 CFB enc | 294.79 482.31 552.13 594.71 631.60 628.91 638.92 CFB dec | 293.09 466.44 526.56 563.17 594.41 592.26 601.97 CTR enc | 309.61 506.13 576.86 620.47 656.38 654.51 665.10 CTR dec | 306.69 505.57 576.84 620.18 657.09 654.52 665.32 sm4-ce ECB enc | 366.96 1329.81 2024.29 2755.50 3790.07 3861.91 4051.40 ECB dec | 367.30 1323.93 2018.72 2747.43 3787.39 3862.55 4052.62 CBC enc | 358.09 682.68 807.24 885.35 958.29 963.60 973.73 CBC dec | 366.51 1303.63 1978.64 2667.93 3624.53 3683.41 3856.08 CFB enc | 351.51 681.26 807.81 893.10 968.54 969.17 985.83 CFB dec | 354.98 1266.61 1929.63 2634.81 3614.23 3611.59 3841.68 CTR enc | 324.23 1121.25 1689.44 2256.70 2981.90 3007.79 3060.74 CTR dec | 324.18 1120.44 1694.31 2258.32 2982.01 3010.09 3060.99 Signed-off-by: Tianjia Zhang <tianjia.zhang@linux.alibaba.com> Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
2022-03-15 17:44:54 +08:00
/* store new IV */
st1 {RIV.16b}, [x3]
crypto: arm64/sm4 - add ARMv8 Crypto Extensions implementation This adds ARMv8 implementations of SM4 in ECB, CBC, CFB and CTR modes using Crypto Extensions, also includes key expansion operations because the Crypto Extensions instruction is much faster than software implementations. The Crypto Extensions for SM4 can only run on ARMv8 implementations that have support for these optional extensions. Benchmark on T-Head Yitian-710 2.75 GHz, the data comes from the 218 mode of tcrypt. The abscissas are blocks of different lengths. The data is tabulated and the unit is Mb/s: sm4-generic | 16 64 128 256 1024 1420 4096 ECB enc | 80.05 91.42 93.66 94.77 95.69 95.77 95.86 ECB dec | 79.98 91.41 93.64 94.76 95.66 95.77 95.85 CBC enc | 78.55 86.50 88.02 88.77 89.36 89.42 89.48 CBC dec | 76.82 89.06 91.52 92.77 93.75 93.83 93.96 CFB enc | 77.64 86.13 87.62 88.42 89.08 88.83 89.18 CFB dec | 77.57 88.34 90.36 91.45 92.34 92.00 92.44 CTR enc | 77.80 88.28 90.23 91.22 92.11 91.81 92.25 CTR dec | 77.83 88.22 90.22 91.22 92.04 91.82 92.28 sm4-neon ECB enc | 28.31 112.77 203.03 209.89 215.49 202.11 210.59 ECB dec | 28.36 113.45 203.23 210.00 215.52 202.13 210.65 CBC enc | 79.32 87.02 88.51 89.28 89.85 89.89 89.97 CBC dec | 28.29 112.20 203.30 209.82 214.99 201.51 209.95 CFB enc | 79.59 87.16 88.54 89.30 89.83 89.62 89.92 CFB dec | 28.12 111.05 202.47 209.02 214.21 210.90 209.12 CTR enc | 28.04 108.81 200.62 206.65 211.78 208.78 206.74 CTR dec | 28.02 108.82 200.45 206.62 211.78 208.74 206.70 sm4-ce-cipher ECB enc | 336.79 587.13 682.70 747.37 803.75 811.52 818.06 ECB dec | 339.18 584.52 679.72 743.68 798.82 803.83 811.54 CBC enc | 316.63 521.47 597.00 647.14 690.82 695.21 700.55 CBC dec | 291.80 503.79 585.66 640.82 689.86 695.16 701.72 CFB enc | 294.79 482.31 552.13 594.71 631.60 628.91 638.92 CFB dec | 293.09 466.44 526.56 563.17 594.41 592.26 601.97 CTR enc | 309.61 506.13 576.86 620.47 656.38 654.51 665.10 CTR dec | 306.69 505.57 576.84 620.18 657.09 654.52 665.32 sm4-ce ECB enc | 366.96 1329.81 2024.29 2755.50 3790.07 3861.91 4051.40 ECB dec | 367.30 1323.93 2018.72 2747.43 3787.39 3862.55 4052.62 CBC enc | 358.09 682.68 807.24 885.35 958.29 963.60 973.73 CBC dec | 366.51 1303.63 1978.64 2667.93 3624.53 3683.41 3856.08 CFB enc | 351.51 681.26 807.81 893.10 968.54 969.17 985.83 CFB dec | 354.98 1266.61 1929.63 2634.81 3614.23 3611.59 3841.68 CTR enc | 324.23 1121.25 1689.44 2256.70 2981.90 3007.79 3060.74 CTR dec | 324.18 1120.44 1694.31 2258.32 2982.01 3010.09 3060.99 Signed-off-by: Tianjia Zhang <tianjia.zhang@linux.alibaba.com> Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
2022-03-15 17:44:54 +08:00
ret
crypto: arm64/sm4 - add ARMv8 Crypto Extensions implementation This adds ARMv8 implementations of SM4 in ECB, CBC, CFB and CTR modes using Crypto Extensions, also includes key expansion operations because the Crypto Extensions instruction is much faster than software implementations. The Crypto Extensions for SM4 can only run on ARMv8 implementations that have support for these optional extensions. Benchmark on T-Head Yitian-710 2.75 GHz, the data comes from the 218 mode of tcrypt. The abscissas are blocks of different lengths. The data is tabulated and the unit is Mb/s: sm4-generic | 16 64 128 256 1024 1420 4096 ECB enc | 80.05 91.42 93.66 94.77 95.69 95.77 95.86 ECB dec | 79.98 91.41 93.64 94.76 95.66 95.77 95.85 CBC enc | 78.55 86.50 88.02 88.77 89.36 89.42 89.48 CBC dec | 76.82 89.06 91.52 92.77 93.75 93.83 93.96 CFB enc | 77.64 86.13 87.62 88.42 89.08 88.83 89.18 CFB dec | 77.57 88.34 90.36 91.45 92.34 92.00 92.44 CTR enc | 77.80 88.28 90.23 91.22 92.11 91.81 92.25 CTR dec | 77.83 88.22 90.22 91.22 92.04 91.82 92.28 sm4-neon ECB enc | 28.31 112.77 203.03 209.89 215.49 202.11 210.59 ECB dec | 28.36 113.45 203.23 210.00 215.52 202.13 210.65 CBC enc | 79.32 87.02 88.51 89.28 89.85 89.89 89.97 CBC dec | 28.29 112.20 203.30 209.82 214.99 201.51 209.95 CFB enc | 79.59 87.16 88.54 89.30 89.83 89.62 89.92 CFB dec | 28.12 111.05 202.47 209.02 214.21 210.90 209.12 CTR enc | 28.04 108.81 200.62 206.65 211.78 208.78 206.74 CTR dec | 28.02 108.82 200.45 206.62 211.78 208.74 206.70 sm4-ce-cipher ECB enc | 336.79 587.13 682.70 747.37 803.75 811.52 818.06 ECB dec | 339.18 584.52 679.72 743.68 798.82 803.83 811.54 CBC enc | 316.63 521.47 597.00 647.14 690.82 695.21 700.55 CBC dec | 291.80 503.79 585.66 640.82 689.86 695.16 701.72 CFB enc | 294.79 482.31 552.13 594.71 631.60 628.91 638.92 CFB dec | 293.09 466.44 526.56 563.17 594.41 592.26 601.97 CTR enc | 309.61 506.13 576.86 620.47 656.38 654.51 665.10 CTR dec | 306.69 505.57 576.84 620.18 657.09 654.52 665.32 sm4-ce ECB enc | 366.96 1329.81 2024.29 2755.50 3790.07 3861.91 4051.40 ECB dec | 367.30 1323.93 2018.72 2747.43 3787.39 3862.55 4052.62 CBC enc | 358.09 682.68 807.24 885.35 958.29 963.60 973.73 CBC dec | 366.51 1303.63 1978.64 2667.93 3624.53 3683.41 3856.08 CFB enc | 351.51 681.26 807.81 893.10 968.54 969.17 985.83 CFB dec | 354.98 1266.61 1929.63 2634.81 3614.23 3611.59 3841.68 CTR enc | 324.23 1121.25 1689.44 2256.70 2981.90 3007.79 3060.74 CTR dec | 324.18 1120.44 1694.31 2258.32 2982.01 3010.09 3060.99 Signed-off-by: Tianjia Zhang <tianjia.zhang@linux.alibaba.com> Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
2022-03-15 17:44:54 +08:00
SYM_FUNC_END(sm4_ce_cfb_enc)
.align 3
SYM_FUNC_START(sm4_ce_cfb_dec)
/* input:
* x0: round key array, CTX
* x1: dst
* x2: src
* x3: iv (big endian, 128 bit)
* w4: nblocks
*/
SM4_PREPARE(x0)
crypto: arm64/sm4 - add ARMv8 Crypto Extensions implementation This adds ARMv8 implementations of SM4 in ECB, CBC, CFB and CTR modes using Crypto Extensions, also includes key expansion operations because the Crypto Extensions instruction is much faster than software implementations. The Crypto Extensions for SM4 can only run on ARMv8 implementations that have support for these optional extensions. Benchmark on T-Head Yitian-710 2.75 GHz, the data comes from the 218 mode of tcrypt. The abscissas are blocks of different lengths. The data is tabulated and the unit is Mb/s: sm4-generic | 16 64 128 256 1024 1420 4096 ECB enc | 80.05 91.42 93.66 94.77 95.69 95.77 95.86 ECB dec | 79.98 91.41 93.64 94.76 95.66 95.77 95.85 CBC enc | 78.55 86.50 88.02 88.77 89.36 89.42 89.48 CBC dec | 76.82 89.06 91.52 92.77 93.75 93.83 93.96 CFB enc | 77.64 86.13 87.62 88.42 89.08 88.83 89.18 CFB dec | 77.57 88.34 90.36 91.45 92.34 92.00 92.44 CTR enc | 77.80 88.28 90.23 91.22 92.11 91.81 92.25 CTR dec | 77.83 88.22 90.22 91.22 92.04 91.82 92.28 sm4-neon ECB enc | 28.31 112.77 203.03 209.89 215.49 202.11 210.59 ECB dec | 28.36 113.45 203.23 210.00 215.52 202.13 210.65 CBC enc | 79.32 87.02 88.51 89.28 89.85 89.89 89.97 CBC dec | 28.29 112.20 203.30 209.82 214.99 201.51 209.95 CFB enc | 79.59 87.16 88.54 89.30 89.83 89.62 89.92 CFB dec | 28.12 111.05 202.47 209.02 214.21 210.90 209.12 CTR enc | 28.04 108.81 200.62 206.65 211.78 208.78 206.74 CTR dec | 28.02 108.82 200.45 206.62 211.78 208.74 206.70 sm4-ce-cipher ECB enc | 336.79 587.13 682.70 747.37 803.75 811.52 818.06 ECB dec | 339.18 584.52 679.72 743.68 798.82 803.83 811.54 CBC enc | 316.63 521.47 597.00 647.14 690.82 695.21 700.55 CBC dec | 291.80 503.79 585.66 640.82 689.86 695.16 701.72 CFB enc | 294.79 482.31 552.13 594.71 631.60 628.91 638.92 CFB dec | 293.09 466.44 526.56 563.17 594.41 592.26 601.97 CTR enc | 309.61 506.13 576.86 620.47 656.38 654.51 665.10 CTR dec | 306.69 505.57 576.84 620.18 657.09 654.52 665.32 sm4-ce ECB enc | 366.96 1329.81 2024.29 2755.50 3790.07 3861.91 4051.40 ECB dec | 367.30 1323.93 2018.72 2747.43 3787.39 3862.55 4052.62 CBC enc | 358.09 682.68 807.24 885.35 958.29 963.60 973.73 CBC dec | 366.51 1303.63 1978.64 2667.93 3624.53 3683.41 3856.08 CFB enc | 351.51 681.26 807.81 893.10 968.54 969.17 985.83 CFB dec | 354.98 1266.61 1929.63 2634.81 3614.23 3611.59 3841.68 CTR enc | 324.23 1121.25 1689.44 2256.70 2981.90 3007.79 3060.74 CTR dec | 324.18 1120.44 1694.31 2258.32 2982.01 3010.09 3060.99 Signed-off-by: Tianjia Zhang <tianjia.zhang@linux.alibaba.com> Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
2022-03-15 17:44:54 +08:00
ld1 {RIV.16b}, [x3]
crypto: arm64/sm4 - add ARMv8 Crypto Extensions implementation This adds ARMv8 implementations of SM4 in ECB, CBC, CFB and CTR modes using Crypto Extensions, also includes key expansion operations because the Crypto Extensions instruction is much faster than software implementations. The Crypto Extensions for SM4 can only run on ARMv8 implementations that have support for these optional extensions. Benchmark on T-Head Yitian-710 2.75 GHz, the data comes from the 218 mode of tcrypt. The abscissas are blocks of different lengths. The data is tabulated and the unit is Mb/s: sm4-generic | 16 64 128 256 1024 1420 4096 ECB enc | 80.05 91.42 93.66 94.77 95.69 95.77 95.86 ECB dec | 79.98 91.41 93.64 94.76 95.66 95.77 95.85 CBC enc | 78.55 86.50 88.02 88.77 89.36 89.42 89.48 CBC dec | 76.82 89.06 91.52 92.77 93.75 93.83 93.96 CFB enc | 77.64 86.13 87.62 88.42 89.08 88.83 89.18 CFB dec | 77.57 88.34 90.36 91.45 92.34 92.00 92.44 CTR enc | 77.80 88.28 90.23 91.22 92.11 91.81 92.25 CTR dec | 77.83 88.22 90.22 91.22 92.04 91.82 92.28 sm4-neon ECB enc | 28.31 112.77 203.03 209.89 215.49 202.11 210.59 ECB dec | 28.36 113.45 203.23 210.00 215.52 202.13 210.65 CBC enc | 79.32 87.02 88.51 89.28 89.85 89.89 89.97 CBC dec | 28.29 112.20 203.30 209.82 214.99 201.51 209.95 CFB enc | 79.59 87.16 88.54 89.30 89.83 89.62 89.92 CFB dec | 28.12 111.05 202.47 209.02 214.21 210.90 209.12 CTR enc | 28.04 108.81 200.62 206.65 211.78 208.78 206.74 CTR dec | 28.02 108.82 200.45 206.62 211.78 208.74 206.70 sm4-ce-cipher ECB enc | 336.79 587.13 682.70 747.37 803.75 811.52 818.06 ECB dec | 339.18 584.52 679.72 743.68 798.82 803.83 811.54 CBC enc | 316.63 521.47 597.00 647.14 690.82 695.21 700.55 CBC dec | 291.80 503.79 585.66 640.82 689.86 695.16 701.72 CFB enc | 294.79 482.31 552.13 594.71 631.60 628.91 638.92 CFB dec | 293.09 466.44 526.56 563.17 594.41 592.26 601.97 CTR enc | 309.61 506.13 576.86 620.47 656.38 654.51 665.10 CTR dec | 306.69 505.57 576.84 620.18 657.09 654.52 665.32 sm4-ce ECB enc | 366.96 1329.81 2024.29 2755.50 3790.07 3861.91 4051.40 ECB dec | 367.30 1323.93 2018.72 2747.43 3787.39 3862.55 4052.62 CBC enc | 358.09 682.68 807.24 885.35 958.29 963.60 973.73 CBC dec | 366.51 1303.63 1978.64 2667.93 3624.53 3683.41 3856.08 CFB enc | 351.51 681.26 807.81 893.10 968.54 969.17 985.83 CFB dec | 354.98 1266.61 1929.63 2634.81 3614.23 3611.59 3841.68 CTR enc | 324.23 1121.25 1689.44 2256.70 2981.90 3007.79 3060.74 CTR dec | 324.18 1120.44 1694.31 2258.32 2982.01 3010.09 3060.99 Signed-off-by: Tianjia Zhang <tianjia.zhang@linux.alibaba.com> Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
2022-03-15 17:44:54 +08:00
.Lcfb_dec_loop_8x:
sub w4, w4, #8
tbnz w4, #31, .Lcfb_dec_4x
crypto: arm64/sm4 - add ARMv8 Crypto Extensions implementation This adds ARMv8 implementations of SM4 in ECB, CBC, CFB and CTR modes using Crypto Extensions, also includes key expansion operations because the Crypto Extensions instruction is much faster than software implementations. The Crypto Extensions for SM4 can only run on ARMv8 implementations that have support for these optional extensions. Benchmark on T-Head Yitian-710 2.75 GHz, the data comes from the 218 mode of tcrypt. The abscissas are blocks of different lengths. The data is tabulated and the unit is Mb/s: sm4-generic | 16 64 128 256 1024 1420 4096 ECB enc | 80.05 91.42 93.66 94.77 95.69 95.77 95.86 ECB dec | 79.98 91.41 93.64 94.76 95.66 95.77 95.85 CBC enc | 78.55 86.50 88.02 88.77 89.36 89.42 89.48 CBC dec | 76.82 89.06 91.52 92.77 93.75 93.83 93.96 CFB enc | 77.64 86.13 87.62 88.42 89.08 88.83 89.18 CFB dec | 77.57 88.34 90.36 91.45 92.34 92.00 92.44 CTR enc | 77.80 88.28 90.23 91.22 92.11 91.81 92.25 CTR dec | 77.83 88.22 90.22 91.22 92.04 91.82 92.28 sm4-neon ECB enc | 28.31 112.77 203.03 209.89 215.49 202.11 210.59 ECB dec | 28.36 113.45 203.23 210.00 215.52 202.13 210.65 CBC enc | 79.32 87.02 88.51 89.28 89.85 89.89 89.97 CBC dec | 28.29 112.20 203.30 209.82 214.99 201.51 209.95 CFB enc | 79.59 87.16 88.54 89.30 89.83 89.62 89.92 CFB dec | 28.12 111.05 202.47 209.02 214.21 210.90 209.12 CTR enc | 28.04 108.81 200.62 206.65 211.78 208.78 206.74 CTR dec | 28.02 108.82 200.45 206.62 211.78 208.74 206.70 sm4-ce-cipher ECB enc | 336.79 587.13 682.70 747.37 803.75 811.52 818.06 ECB dec | 339.18 584.52 679.72 743.68 798.82 803.83 811.54 CBC enc | 316.63 521.47 597.00 647.14 690.82 695.21 700.55 CBC dec | 291.80 503.79 585.66 640.82 689.86 695.16 701.72 CFB enc | 294.79 482.31 552.13 594.71 631.60 628.91 638.92 CFB dec | 293.09 466.44 526.56 563.17 594.41 592.26 601.97 CTR enc | 309.61 506.13 576.86 620.47 656.38 654.51 665.10 CTR dec | 306.69 505.57 576.84 620.18 657.09 654.52 665.32 sm4-ce ECB enc | 366.96 1329.81 2024.29 2755.50 3790.07 3861.91 4051.40 ECB dec | 367.30 1323.93 2018.72 2747.43 3787.39 3862.55 4052.62 CBC enc | 358.09 682.68 807.24 885.35 958.29 963.60 973.73 CBC dec | 366.51 1303.63 1978.64 2667.93 3624.53 3683.41 3856.08 CFB enc | 351.51 681.26 807.81 893.10 968.54 969.17 985.83 CFB dec | 354.98 1266.61 1929.63 2634.81 3614.23 3611.59 3841.68 CTR enc | 324.23 1121.25 1689.44 2256.70 2981.90 3007.79 3060.74 CTR dec | 324.18 1120.44 1694.31 2258.32 2982.01 3010.09 3060.99 Signed-off-by: Tianjia Zhang <tianjia.zhang@linux.alibaba.com> Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
2022-03-15 17:44:54 +08:00
ld1 {v0.16b-v3.16b}, [x2], #64
ld1 {v4.16b-v7.16b}, [x2], #64
crypto: arm64/sm4 - add ARMv8 Crypto Extensions implementation This adds ARMv8 implementations of SM4 in ECB, CBC, CFB and CTR modes using Crypto Extensions, also includes key expansion operations because the Crypto Extensions instruction is much faster than software implementations. The Crypto Extensions for SM4 can only run on ARMv8 implementations that have support for these optional extensions. Benchmark on T-Head Yitian-710 2.75 GHz, the data comes from the 218 mode of tcrypt. The abscissas are blocks of different lengths. The data is tabulated and the unit is Mb/s: sm4-generic | 16 64 128 256 1024 1420 4096 ECB enc | 80.05 91.42 93.66 94.77 95.69 95.77 95.86 ECB dec | 79.98 91.41 93.64 94.76 95.66 95.77 95.85 CBC enc | 78.55 86.50 88.02 88.77 89.36 89.42 89.48 CBC dec | 76.82 89.06 91.52 92.77 93.75 93.83 93.96 CFB enc | 77.64 86.13 87.62 88.42 89.08 88.83 89.18 CFB dec | 77.57 88.34 90.36 91.45 92.34 92.00 92.44 CTR enc | 77.80 88.28 90.23 91.22 92.11 91.81 92.25 CTR dec | 77.83 88.22 90.22 91.22 92.04 91.82 92.28 sm4-neon ECB enc | 28.31 112.77 203.03 209.89 215.49 202.11 210.59 ECB dec | 28.36 113.45 203.23 210.00 215.52 202.13 210.65 CBC enc | 79.32 87.02 88.51 89.28 89.85 89.89 89.97 CBC dec | 28.29 112.20 203.30 209.82 214.99 201.51 209.95 CFB enc | 79.59 87.16 88.54 89.30 89.83 89.62 89.92 CFB dec | 28.12 111.05 202.47 209.02 214.21 210.90 209.12 CTR enc | 28.04 108.81 200.62 206.65 211.78 208.78 206.74 CTR dec | 28.02 108.82 200.45 206.62 211.78 208.74 206.70 sm4-ce-cipher ECB enc | 336.79 587.13 682.70 747.37 803.75 811.52 818.06 ECB dec | 339.18 584.52 679.72 743.68 798.82 803.83 811.54 CBC enc | 316.63 521.47 597.00 647.14 690.82 695.21 700.55 CBC dec | 291.80 503.79 585.66 640.82 689.86 695.16 701.72 CFB enc | 294.79 482.31 552.13 594.71 631.60 628.91 638.92 CFB dec | 293.09 466.44 526.56 563.17 594.41 592.26 601.97 CTR enc | 309.61 506.13 576.86 620.47 656.38 654.51 665.10 CTR dec | 306.69 505.57 576.84 620.18 657.09 654.52 665.32 sm4-ce ECB enc | 366.96 1329.81 2024.29 2755.50 3790.07 3861.91 4051.40 ECB dec | 367.30 1323.93 2018.72 2747.43 3787.39 3862.55 4052.62 CBC enc | 358.09 682.68 807.24 885.35 958.29 963.60 973.73 CBC dec | 366.51 1303.63 1978.64 2667.93 3624.53 3683.41 3856.08 CFB enc | 351.51 681.26 807.81 893.10 968.54 969.17 985.83 CFB dec | 354.98 1266.61 1929.63 2634.81 3614.23 3611.59 3841.68 CTR enc | 324.23 1121.25 1689.44 2256.70 2981.90 3007.79 3060.74 CTR dec | 324.18 1120.44 1694.31 2258.32 2982.01 3010.09 3060.99 Signed-off-by: Tianjia Zhang <tianjia.zhang@linux.alibaba.com> Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
2022-03-15 17:44:54 +08:00
rev32 v8.16b, RIV.16b
rev32 v9.16b, v0.16b
rev32 v10.16b, v1.16b
rev32 v11.16b, v2.16b
rev32 v12.16b, v3.16b
rev32 v13.16b, v4.16b
rev32 v14.16b, v5.16b
rev32 v15.16b, v6.16b
crypto: arm64/sm4 - add ARMv8 Crypto Extensions implementation This adds ARMv8 implementations of SM4 in ECB, CBC, CFB and CTR modes using Crypto Extensions, also includes key expansion operations because the Crypto Extensions instruction is much faster than software implementations. The Crypto Extensions for SM4 can only run on ARMv8 implementations that have support for these optional extensions. Benchmark on T-Head Yitian-710 2.75 GHz, the data comes from the 218 mode of tcrypt. The abscissas are blocks of different lengths. The data is tabulated and the unit is Mb/s: sm4-generic | 16 64 128 256 1024 1420 4096 ECB enc | 80.05 91.42 93.66 94.77 95.69 95.77 95.86 ECB dec | 79.98 91.41 93.64 94.76 95.66 95.77 95.85 CBC enc | 78.55 86.50 88.02 88.77 89.36 89.42 89.48 CBC dec | 76.82 89.06 91.52 92.77 93.75 93.83 93.96 CFB enc | 77.64 86.13 87.62 88.42 89.08 88.83 89.18 CFB dec | 77.57 88.34 90.36 91.45 92.34 92.00 92.44 CTR enc | 77.80 88.28 90.23 91.22 92.11 91.81 92.25 CTR dec | 77.83 88.22 90.22 91.22 92.04 91.82 92.28 sm4-neon ECB enc | 28.31 112.77 203.03 209.89 215.49 202.11 210.59 ECB dec | 28.36 113.45 203.23 210.00 215.52 202.13 210.65 CBC enc | 79.32 87.02 88.51 89.28 89.85 89.89 89.97 CBC dec | 28.29 112.20 203.30 209.82 214.99 201.51 209.95 CFB enc | 79.59 87.16 88.54 89.30 89.83 89.62 89.92 CFB dec | 28.12 111.05 202.47 209.02 214.21 210.90 209.12 CTR enc | 28.04 108.81 200.62 206.65 211.78 208.78 206.74 CTR dec | 28.02 108.82 200.45 206.62 211.78 208.74 206.70 sm4-ce-cipher ECB enc | 336.79 587.13 682.70 747.37 803.75 811.52 818.06 ECB dec | 339.18 584.52 679.72 743.68 798.82 803.83 811.54 CBC enc | 316.63 521.47 597.00 647.14 690.82 695.21 700.55 CBC dec | 291.80 503.79 585.66 640.82 689.86 695.16 701.72 CFB enc | 294.79 482.31 552.13 594.71 631.60 628.91 638.92 CFB dec | 293.09 466.44 526.56 563.17 594.41 592.26 601.97 CTR enc | 309.61 506.13 576.86 620.47 656.38 654.51 665.10 CTR dec | 306.69 505.57 576.84 620.18 657.09 654.52 665.32 sm4-ce ECB enc | 366.96 1329.81 2024.29 2755.50 3790.07 3861.91 4051.40 ECB dec | 367.30 1323.93 2018.72 2747.43 3787.39 3862.55 4052.62 CBC enc | 358.09 682.68 807.24 885.35 958.29 963.60 973.73 CBC dec | 366.51 1303.63 1978.64 2667.93 3624.53 3683.41 3856.08 CFB enc | 351.51 681.26 807.81 893.10 968.54 969.17 985.83 CFB dec | 354.98 1266.61 1929.63 2634.81 3614.23 3611.59 3841.68 CTR enc | 324.23 1121.25 1689.44 2256.70 2981.90 3007.79 3060.74 CTR dec | 324.18 1120.44 1694.31 2258.32 2982.01 3010.09 3060.99 Signed-off-by: Tianjia Zhang <tianjia.zhang@linux.alibaba.com> Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
2022-03-15 17:44:54 +08:00
SM4_CRYPT_BLK8_BE(v8, v9, v10, v11, v12, v13, v14, v15)
crypto: arm64/sm4 - add ARMv8 Crypto Extensions implementation This adds ARMv8 implementations of SM4 in ECB, CBC, CFB and CTR modes using Crypto Extensions, also includes key expansion operations because the Crypto Extensions instruction is much faster than software implementations. The Crypto Extensions for SM4 can only run on ARMv8 implementations that have support for these optional extensions. Benchmark on T-Head Yitian-710 2.75 GHz, the data comes from the 218 mode of tcrypt. The abscissas are blocks of different lengths. The data is tabulated and the unit is Mb/s: sm4-generic | 16 64 128 256 1024 1420 4096 ECB enc | 80.05 91.42 93.66 94.77 95.69 95.77 95.86 ECB dec | 79.98 91.41 93.64 94.76 95.66 95.77 95.85 CBC enc | 78.55 86.50 88.02 88.77 89.36 89.42 89.48 CBC dec | 76.82 89.06 91.52 92.77 93.75 93.83 93.96 CFB enc | 77.64 86.13 87.62 88.42 89.08 88.83 89.18 CFB dec | 77.57 88.34 90.36 91.45 92.34 92.00 92.44 CTR enc | 77.80 88.28 90.23 91.22 92.11 91.81 92.25 CTR dec | 77.83 88.22 90.22 91.22 92.04 91.82 92.28 sm4-neon ECB enc | 28.31 112.77 203.03 209.89 215.49 202.11 210.59 ECB dec | 28.36 113.45 203.23 210.00 215.52 202.13 210.65 CBC enc | 79.32 87.02 88.51 89.28 89.85 89.89 89.97 CBC dec | 28.29 112.20 203.30 209.82 214.99 201.51 209.95 CFB enc | 79.59 87.16 88.54 89.30 89.83 89.62 89.92 CFB dec | 28.12 111.05 202.47 209.02 214.21 210.90 209.12 CTR enc | 28.04 108.81 200.62 206.65 211.78 208.78 206.74 CTR dec | 28.02 108.82 200.45 206.62 211.78 208.74 206.70 sm4-ce-cipher ECB enc | 336.79 587.13 682.70 747.37 803.75 811.52 818.06 ECB dec | 339.18 584.52 679.72 743.68 798.82 803.83 811.54 CBC enc | 316.63 521.47 597.00 647.14 690.82 695.21 700.55 CBC dec | 291.80 503.79 585.66 640.82 689.86 695.16 701.72 CFB enc | 294.79 482.31 552.13 594.71 631.60 628.91 638.92 CFB dec | 293.09 466.44 526.56 563.17 594.41 592.26 601.97 CTR enc | 309.61 506.13 576.86 620.47 656.38 654.51 665.10 CTR dec | 306.69 505.57 576.84 620.18 657.09 654.52 665.32 sm4-ce ECB enc | 366.96 1329.81 2024.29 2755.50 3790.07 3861.91 4051.40 ECB dec | 367.30 1323.93 2018.72 2747.43 3787.39 3862.55 4052.62 CBC enc | 358.09 682.68 807.24 885.35 958.29 963.60 973.73 CBC dec | 366.51 1303.63 1978.64 2667.93 3624.53 3683.41 3856.08 CFB enc | 351.51 681.26 807.81 893.10 968.54 969.17 985.83 CFB dec | 354.98 1266.61 1929.63 2634.81 3614.23 3611.59 3841.68 CTR enc | 324.23 1121.25 1689.44 2256.70 2981.90 3007.79 3060.74 CTR dec | 324.18 1120.44 1694.31 2258.32 2982.01 3010.09 3060.99 Signed-off-by: Tianjia Zhang <tianjia.zhang@linux.alibaba.com> Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
2022-03-15 17:44:54 +08:00
mov RIV.16b, v7.16b
crypto: arm64/sm4 - add ARMv8 Crypto Extensions implementation This adds ARMv8 implementations of SM4 in ECB, CBC, CFB and CTR modes using Crypto Extensions, also includes key expansion operations because the Crypto Extensions instruction is much faster than software implementations. The Crypto Extensions for SM4 can only run on ARMv8 implementations that have support for these optional extensions. Benchmark on T-Head Yitian-710 2.75 GHz, the data comes from the 218 mode of tcrypt. The abscissas are blocks of different lengths. The data is tabulated and the unit is Mb/s: sm4-generic | 16 64 128 256 1024 1420 4096 ECB enc | 80.05 91.42 93.66 94.77 95.69 95.77 95.86 ECB dec | 79.98 91.41 93.64 94.76 95.66 95.77 95.85 CBC enc | 78.55 86.50 88.02 88.77 89.36 89.42 89.48 CBC dec | 76.82 89.06 91.52 92.77 93.75 93.83 93.96 CFB enc | 77.64 86.13 87.62 88.42 89.08 88.83 89.18 CFB dec | 77.57 88.34 90.36 91.45 92.34 92.00 92.44 CTR enc | 77.80 88.28 90.23 91.22 92.11 91.81 92.25 CTR dec | 77.83 88.22 90.22 91.22 92.04 91.82 92.28 sm4-neon ECB enc | 28.31 112.77 203.03 209.89 215.49 202.11 210.59 ECB dec | 28.36 113.45 203.23 210.00 215.52 202.13 210.65 CBC enc | 79.32 87.02 88.51 89.28 89.85 89.89 89.97 CBC dec | 28.29 112.20 203.30 209.82 214.99 201.51 209.95 CFB enc | 79.59 87.16 88.54 89.30 89.83 89.62 89.92 CFB dec | 28.12 111.05 202.47 209.02 214.21 210.90 209.12 CTR enc | 28.04 108.81 200.62 206.65 211.78 208.78 206.74 CTR dec | 28.02 108.82 200.45 206.62 211.78 208.74 206.70 sm4-ce-cipher ECB enc | 336.79 587.13 682.70 747.37 803.75 811.52 818.06 ECB dec | 339.18 584.52 679.72 743.68 798.82 803.83 811.54 CBC enc | 316.63 521.47 597.00 647.14 690.82 695.21 700.55 CBC dec | 291.80 503.79 585.66 640.82 689.86 695.16 701.72 CFB enc | 294.79 482.31 552.13 594.71 631.60 628.91 638.92 CFB dec | 293.09 466.44 526.56 563.17 594.41 592.26 601.97 CTR enc | 309.61 506.13 576.86 620.47 656.38 654.51 665.10 CTR dec | 306.69 505.57 576.84 620.18 657.09 654.52 665.32 sm4-ce ECB enc | 366.96 1329.81 2024.29 2755.50 3790.07 3861.91 4051.40 ECB dec | 367.30 1323.93 2018.72 2747.43 3787.39 3862.55 4052.62 CBC enc | 358.09 682.68 807.24 885.35 958.29 963.60 973.73 CBC dec | 366.51 1303.63 1978.64 2667.93 3624.53 3683.41 3856.08 CFB enc | 351.51 681.26 807.81 893.10 968.54 969.17 985.83 CFB dec | 354.98 1266.61 1929.63 2634.81 3614.23 3611.59 3841.68 CTR enc | 324.23 1121.25 1689.44 2256.70 2981.90 3007.79 3060.74 CTR dec | 324.18 1120.44 1694.31 2258.32 2982.01 3010.09 3060.99 Signed-off-by: Tianjia Zhang <tianjia.zhang@linux.alibaba.com> Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
2022-03-15 17:44:54 +08:00
eor v0.16b, v0.16b, v8.16b
eor v1.16b, v1.16b, v9.16b
eor v2.16b, v2.16b, v10.16b
eor v3.16b, v3.16b, v11.16b
eor v4.16b, v4.16b, v12.16b
eor v5.16b, v5.16b, v13.16b
eor v6.16b, v6.16b, v14.16b
eor v7.16b, v7.16b, v15.16b
crypto: arm64/sm4 - add ARMv8 Crypto Extensions implementation This adds ARMv8 implementations of SM4 in ECB, CBC, CFB and CTR modes using Crypto Extensions, also includes key expansion operations because the Crypto Extensions instruction is much faster than software implementations. The Crypto Extensions for SM4 can only run on ARMv8 implementations that have support for these optional extensions. Benchmark on T-Head Yitian-710 2.75 GHz, the data comes from the 218 mode of tcrypt. The abscissas are blocks of different lengths. The data is tabulated and the unit is Mb/s: sm4-generic | 16 64 128 256 1024 1420 4096 ECB enc | 80.05 91.42 93.66 94.77 95.69 95.77 95.86 ECB dec | 79.98 91.41 93.64 94.76 95.66 95.77 95.85 CBC enc | 78.55 86.50 88.02 88.77 89.36 89.42 89.48 CBC dec | 76.82 89.06 91.52 92.77 93.75 93.83 93.96 CFB enc | 77.64 86.13 87.62 88.42 89.08 88.83 89.18 CFB dec | 77.57 88.34 90.36 91.45 92.34 92.00 92.44 CTR enc | 77.80 88.28 90.23 91.22 92.11 91.81 92.25 CTR dec | 77.83 88.22 90.22 91.22 92.04 91.82 92.28 sm4-neon ECB enc | 28.31 112.77 203.03 209.89 215.49 202.11 210.59 ECB dec | 28.36 113.45 203.23 210.00 215.52 202.13 210.65 CBC enc | 79.32 87.02 88.51 89.28 89.85 89.89 89.97 CBC dec | 28.29 112.20 203.30 209.82 214.99 201.51 209.95 CFB enc | 79.59 87.16 88.54 89.30 89.83 89.62 89.92 CFB dec | 28.12 111.05 202.47 209.02 214.21 210.90 209.12 CTR enc | 28.04 108.81 200.62 206.65 211.78 208.78 206.74 CTR dec | 28.02 108.82 200.45 206.62 211.78 208.74 206.70 sm4-ce-cipher ECB enc | 336.79 587.13 682.70 747.37 803.75 811.52 818.06 ECB dec | 339.18 584.52 679.72 743.68 798.82 803.83 811.54 CBC enc | 316.63 521.47 597.00 647.14 690.82 695.21 700.55 CBC dec | 291.80 503.79 585.66 640.82 689.86 695.16 701.72 CFB enc | 294.79 482.31 552.13 594.71 631.60 628.91 638.92 CFB dec | 293.09 466.44 526.56 563.17 594.41 592.26 601.97 CTR enc | 309.61 506.13 576.86 620.47 656.38 654.51 665.10 CTR dec | 306.69 505.57 576.84 620.18 657.09 654.52 665.32 sm4-ce ECB enc | 366.96 1329.81 2024.29 2755.50 3790.07 3861.91 4051.40 ECB dec | 367.30 1323.93 2018.72 2747.43 3787.39 3862.55 4052.62 CBC enc | 358.09 682.68 807.24 885.35 958.29 963.60 973.73 CBC dec | 366.51 1303.63 1978.64 2667.93 3624.53 3683.41 3856.08 CFB enc | 351.51 681.26 807.81 893.10 968.54 969.17 985.83 CFB dec | 354.98 1266.61 1929.63 2634.81 3614.23 3611.59 3841.68 CTR enc | 324.23 1121.25 1689.44 2256.70 2981.90 3007.79 3060.74 CTR dec | 324.18 1120.44 1694.31 2258.32 2982.01 3010.09 3060.99 Signed-off-by: Tianjia Zhang <tianjia.zhang@linux.alibaba.com> Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
2022-03-15 17:44:54 +08:00
st1 {v0.16b-v3.16b}, [x1], #64
st1 {v4.16b-v7.16b}, [x1], #64
crypto: arm64/sm4 - add ARMv8 Crypto Extensions implementation This adds ARMv8 implementations of SM4 in ECB, CBC, CFB and CTR modes using Crypto Extensions, also includes key expansion operations because the Crypto Extensions instruction is much faster than software implementations. The Crypto Extensions for SM4 can only run on ARMv8 implementations that have support for these optional extensions. Benchmark on T-Head Yitian-710 2.75 GHz, the data comes from the 218 mode of tcrypt. The abscissas are blocks of different lengths. The data is tabulated and the unit is Mb/s: sm4-generic | 16 64 128 256 1024 1420 4096 ECB enc | 80.05 91.42 93.66 94.77 95.69 95.77 95.86 ECB dec | 79.98 91.41 93.64 94.76 95.66 95.77 95.85 CBC enc | 78.55 86.50 88.02 88.77 89.36 89.42 89.48 CBC dec | 76.82 89.06 91.52 92.77 93.75 93.83 93.96 CFB enc | 77.64 86.13 87.62 88.42 89.08 88.83 89.18 CFB dec | 77.57 88.34 90.36 91.45 92.34 92.00 92.44 CTR enc | 77.80 88.28 90.23 91.22 92.11 91.81 92.25 CTR dec | 77.83 88.22 90.22 91.22 92.04 91.82 92.28 sm4-neon ECB enc | 28.31 112.77 203.03 209.89 215.49 202.11 210.59 ECB dec | 28.36 113.45 203.23 210.00 215.52 202.13 210.65 CBC enc | 79.32 87.02 88.51 89.28 89.85 89.89 89.97 CBC dec | 28.29 112.20 203.30 209.82 214.99 201.51 209.95 CFB enc | 79.59 87.16 88.54 89.30 89.83 89.62 89.92 CFB dec | 28.12 111.05 202.47 209.02 214.21 210.90 209.12 CTR enc | 28.04 108.81 200.62 206.65 211.78 208.78 206.74 CTR dec | 28.02 108.82 200.45 206.62 211.78 208.74 206.70 sm4-ce-cipher ECB enc | 336.79 587.13 682.70 747.37 803.75 811.52 818.06 ECB dec | 339.18 584.52 679.72 743.68 798.82 803.83 811.54 CBC enc | 316.63 521.47 597.00 647.14 690.82 695.21 700.55 CBC dec | 291.80 503.79 585.66 640.82 689.86 695.16 701.72 CFB enc | 294.79 482.31 552.13 594.71 631.60 628.91 638.92 CFB dec | 293.09 466.44 526.56 563.17 594.41 592.26 601.97 CTR enc | 309.61 506.13 576.86 620.47 656.38 654.51 665.10 CTR dec | 306.69 505.57 576.84 620.18 657.09 654.52 665.32 sm4-ce ECB enc | 366.96 1329.81 2024.29 2755.50 3790.07 3861.91 4051.40 ECB dec | 367.30 1323.93 2018.72 2747.43 3787.39 3862.55 4052.62 CBC enc | 358.09 682.68 807.24 885.35 958.29 963.60 973.73 CBC dec | 366.51 1303.63 1978.64 2667.93 3624.53 3683.41 3856.08 CFB enc | 351.51 681.26 807.81 893.10 968.54 969.17 985.83 CFB dec | 354.98 1266.61 1929.63 2634.81 3614.23 3611.59 3841.68 CTR enc | 324.23 1121.25 1689.44 2256.70 2981.90 3007.79 3060.74 CTR dec | 324.18 1120.44 1694.31 2258.32 2982.01 3010.09 3060.99 Signed-off-by: Tianjia Zhang <tianjia.zhang@linux.alibaba.com> Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
2022-03-15 17:44:54 +08:00
cbz w4, .Lcfb_dec_end
b .Lcfb_dec_loop_8x
crypto: arm64/sm4 - add ARMv8 Crypto Extensions implementation This adds ARMv8 implementations of SM4 in ECB, CBC, CFB and CTR modes using Crypto Extensions, also includes key expansion operations because the Crypto Extensions instruction is much faster than software implementations. The Crypto Extensions for SM4 can only run on ARMv8 implementations that have support for these optional extensions. Benchmark on T-Head Yitian-710 2.75 GHz, the data comes from the 218 mode of tcrypt. The abscissas are blocks of different lengths. The data is tabulated and the unit is Mb/s: sm4-generic | 16 64 128 256 1024 1420 4096 ECB enc | 80.05 91.42 93.66 94.77 95.69 95.77 95.86 ECB dec | 79.98 91.41 93.64 94.76 95.66 95.77 95.85 CBC enc | 78.55 86.50 88.02 88.77 89.36 89.42 89.48 CBC dec | 76.82 89.06 91.52 92.77 93.75 93.83 93.96 CFB enc | 77.64 86.13 87.62 88.42 89.08 88.83 89.18 CFB dec | 77.57 88.34 90.36 91.45 92.34 92.00 92.44 CTR enc | 77.80 88.28 90.23 91.22 92.11 91.81 92.25 CTR dec | 77.83 88.22 90.22 91.22 92.04 91.82 92.28 sm4-neon ECB enc | 28.31 112.77 203.03 209.89 215.49 202.11 210.59 ECB dec | 28.36 113.45 203.23 210.00 215.52 202.13 210.65 CBC enc | 79.32 87.02 88.51 89.28 89.85 89.89 89.97 CBC dec | 28.29 112.20 203.30 209.82 214.99 201.51 209.95 CFB enc | 79.59 87.16 88.54 89.30 89.83 89.62 89.92 CFB dec | 28.12 111.05 202.47 209.02 214.21 210.90 209.12 CTR enc | 28.04 108.81 200.62 206.65 211.78 208.78 206.74 CTR dec | 28.02 108.82 200.45 206.62 211.78 208.74 206.70 sm4-ce-cipher ECB enc | 336.79 587.13 682.70 747.37 803.75 811.52 818.06 ECB dec | 339.18 584.52 679.72 743.68 798.82 803.83 811.54 CBC enc | 316.63 521.47 597.00 647.14 690.82 695.21 700.55 CBC dec | 291.80 503.79 585.66 640.82 689.86 695.16 701.72 CFB enc | 294.79 482.31 552.13 594.71 631.60 628.91 638.92 CFB dec | 293.09 466.44 526.56 563.17 594.41 592.26 601.97 CTR enc | 309.61 506.13 576.86 620.47 656.38 654.51 665.10 CTR dec | 306.69 505.57 576.84 620.18 657.09 654.52 665.32 sm4-ce ECB enc | 366.96 1329.81 2024.29 2755.50 3790.07 3861.91 4051.40 ECB dec | 367.30 1323.93 2018.72 2747.43 3787.39 3862.55 4052.62 CBC enc | 358.09 682.68 807.24 885.35 958.29 963.60 973.73 CBC dec | 366.51 1303.63 1978.64 2667.93 3624.53 3683.41 3856.08 CFB enc | 351.51 681.26 807.81 893.10 968.54 969.17 985.83 CFB dec | 354.98 1266.61 1929.63 2634.81 3614.23 3611.59 3841.68 CTR enc | 324.23 1121.25 1689.44 2256.70 2981.90 3007.79 3060.74 CTR dec | 324.18 1120.44 1694.31 2258.32 2982.01 3010.09 3060.99 Signed-off-by: Tianjia Zhang <tianjia.zhang@linux.alibaba.com> Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
2022-03-15 17:44:54 +08:00
.Lcfb_dec_4x:
add w4, w4, #8
cmp w4, #4
blt .Lcfb_dec_loop_1x
crypto: arm64/sm4 - add ARMv8 Crypto Extensions implementation This adds ARMv8 implementations of SM4 in ECB, CBC, CFB and CTR modes using Crypto Extensions, also includes key expansion operations because the Crypto Extensions instruction is much faster than software implementations. The Crypto Extensions for SM4 can only run on ARMv8 implementations that have support for these optional extensions. Benchmark on T-Head Yitian-710 2.75 GHz, the data comes from the 218 mode of tcrypt. The abscissas are blocks of different lengths. The data is tabulated and the unit is Mb/s: sm4-generic | 16 64 128 256 1024 1420 4096 ECB enc | 80.05 91.42 93.66 94.77 95.69 95.77 95.86 ECB dec | 79.98 91.41 93.64 94.76 95.66 95.77 95.85 CBC enc | 78.55 86.50 88.02 88.77 89.36 89.42 89.48 CBC dec | 76.82 89.06 91.52 92.77 93.75 93.83 93.96 CFB enc | 77.64 86.13 87.62 88.42 89.08 88.83 89.18 CFB dec | 77.57 88.34 90.36 91.45 92.34 92.00 92.44 CTR enc | 77.80 88.28 90.23 91.22 92.11 91.81 92.25 CTR dec | 77.83 88.22 90.22 91.22 92.04 91.82 92.28 sm4-neon ECB enc | 28.31 112.77 203.03 209.89 215.49 202.11 210.59 ECB dec | 28.36 113.45 203.23 210.00 215.52 202.13 210.65 CBC enc | 79.32 87.02 88.51 89.28 89.85 89.89 89.97 CBC dec | 28.29 112.20 203.30 209.82 214.99 201.51 209.95 CFB enc | 79.59 87.16 88.54 89.30 89.83 89.62 89.92 CFB dec | 28.12 111.05 202.47 209.02 214.21 210.90 209.12 CTR enc | 28.04 108.81 200.62 206.65 211.78 208.78 206.74 CTR dec | 28.02 108.82 200.45 206.62 211.78 208.74 206.70 sm4-ce-cipher ECB enc | 336.79 587.13 682.70 747.37 803.75 811.52 818.06 ECB dec | 339.18 584.52 679.72 743.68 798.82 803.83 811.54 CBC enc | 316.63 521.47 597.00 647.14 690.82 695.21 700.55 CBC dec | 291.80 503.79 585.66 640.82 689.86 695.16 701.72 CFB enc | 294.79 482.31 552.13 594.71 631.60 628.91 638.92 CFB dec | 293.09 466.44 526.56 563.17 594.41 592.26 601.97 CTR enc | 309.61 506.13 576.86 620.47 656.38 654.51 665.10 CTR dec | 306.69 505.57 576.84 620.18 657.09 654.52 665.32 sm4-ce ECB enc | 366.96 1329.81 2024.29 2755.50 3790.07 3861.91 4051.40 ECB dec | 367.30 1323.93 2018.72 2747.43 3787.39 3862.55 4052.62 CBC enc | 358.09 682.68 807.24 885.35 958.29 963.60 973.73 CBC dec | 366.51 1303.63 1978.64 2667.93 3624.53 3683.41 3856.08 CFB enc | 351.51 681.26 807.81 893.10 968.54 969.17 985.83 CFB dec | 354.98 1266.61 1929.63 2634.81 3614.23 3611.59 3841.68 CTR enc | 324.23 1121.25 1689.44 2256.70 2981.90 3007.79 3060.74 CTR dec | 324.18 1120.44 1694.31 2258.32 2982.01 3010.09 3060.99 Signed-off-by: Tianjia Zhang <tianjia.zhang@linux.alibaba.com> Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
2022-03-15 17:44:54 +08:00
sub w4, w4, #4
crypto: arm64/sm4 - add ARMv8 Crypto Extensions implementation This adds ARMv8 implementations of SM4 in ECB, CBC, CFB and CTR modes using Crypto Extensions, also includes key expansion operations because the Crypto Extensions instruction is much faster than software implementations. The Crypto Extensions for SM4 can only run on ARMv8 implementations that have support for these optional extensions. Benchmark on T-Head Yitian-710 2.75 GHz, the data comes from the 218 mode of tcrypt. The abscissas are blocks of different lengths. The data is tabulated and the unit is Mb/s: sm4-generic | 16 64 128 256 1024 1420 4096 ECB enc | 80.05 91.42 93.66 94.77 95.69 95.77 95.86 ECB dec | 79.98 91.41 93.64 94.76 95.66 95.77 95.85 CBC enc | 78.55 86.50 88.02 88.77 89.36 89.42 89.48 CBC dec | 76.82 89.06 91.52 92.77 93.75 93.83 93.96 CFB enc | 77.64 86.13 87.62 88.42 89.08 88.83 89.18 CFB dec | 77.57 88.34 90.36 91.45 92.34 92.00 92.44 CTR enc | 77.80 88.28 90.23 91.22 92.11 91.81 92.25 CTR dec | 77.83 88.22 90.22 91.22 92.04 91.82 92.28 sm4-neon ECB enc | 28.31 112.77 203.03 209.89 215.49 202.11 210.59 ECB dec | 28.36 113.45 203.23 210.00 215.52 202.13 210.65 CBC enc | 79.32 87.02 88.51 89.28 89.85 89.89 89.97 CBC dec | 28.29 112.20 203.30 209.82 214.99 201.51 209.95 CFB enc | 79.59 87.16 88.54 89.30 89.83 89.62 89.92 CFB dec | 28.12 111.05 202.47 209.02 214.21 210.90 209.12 CTR enc | 28.04 108.81 200.62 206.65 211.78 208.78 206.74 CTR dec | 28.02 108.82 200.45 206.62 211.78 208.74 206.70 sm4-ce-cipher ECB enc | 336.79 587.13 682.70 747.37 803.75 811.52 818.06 ECB dec | 339.18 584.52 679.72 743.68 798.82 803.83 811.54 CBC enc | 316.63 521.47 597.00 647.14 690.82 695.21 700.55 CBC dec | 291.80 503.79 585.66 640.82 689.86 695.16 701.72 CFB enc | 294.79 482.31 552.13 594.71 631.60 628.91 638.92 CFB dec | 293.09 466.44 526.56 563.17 594.41 592.26 601.97 CTR enc | 309.61 506.13 576.86 620.47 656.38 654.51 665.10 CTR dec | 306.69 505.57 576.84 620.18 657.09 654.52 665.32 sm4-ce ECB enc | 366.96 1329.81 2024.29 2755.50 3790.07 3861.91 4051.40 ECB dec | 367.30 1323.93 2018.72 2747.43 3787.39 3862.55 4052.62 CBC enc | 358.09 682.68 807.24 885.35 958.29 963.60 973.73 CBC dec | 366.51 1303.63 1978.64 2667.93 3624.53 3683.41 3856.08 CFB enc | 351.51 681.26 807.81 893.10 968.54 969.17 985.83 CFB dec | 354.98 1266.61 1929.63 2634.81 3614.23 3611.59 3841.68 CTR enc | 324.23 1121.25 1689.44 2256.70 2981.90 3007.79 3060.74 CTR dec | 324.18 1120.44 1694.31 2258.32 2982.01 3010.09 3060.99 Signed-off-by: Tianjia Zhang <tianjia.zhang@linux.alibaba.com> Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
2022-03-15 17:44:54 +08:00
ld1 {v0.16b-v3.16b}, [x2], #64
crypto: arm64/sm4 - add ARMv8 Crypto Extensions implementation This adds ARMv8 implementations of SM4 in ECB, CBC, CFB and CTR modes using Crypto Extensions, also includes key expansion operations because the Crypto Extensions instruction is much faster than software implementations. The Crypto Extensions for SM4 can only run on ARMv8 implementations that have support for these optional extensions. Benchmark on T-Head Yitian-710 2.75 GHz, the data comes from the 218 mode of tcrypt. The abscissas are blocks of different lengths. The data is tabulated and the unit is Mb/s: sm4-generic | 16 64 128 256 1024 1420 4096 ECB enc | 80.05 91.42 93.66 94.77 95.69 95.77 95.86 ECB dec | 79.98 91.41 93.64 94.76 95.66 95.77 95.85 CBC enc | 78.55 86.50 88.02 88.77 89.36 89.42 89.48 CBC dec | 76.82 89.06 91.52 92.77 93.75 93.83 93.96 CFB enc | 77.64 86.13 87.62 88.42 89.08 88.83 89.18 CFB dec | 77.57 88.34 90.36 91.45 92.34 92.00 92.44 CTR enc | 77.80 88.28 90.23 91.22 92.11 91.81 92.25 CTR dec | 77.83 88.22 90.22 91.22 92.04 91.82 92.28 sm4-neon ECB enc | 28.31 112.77 203.03 209.89 215.49 202.11 210.59 ECB dec | 28.36 113.45 203.23 210.00 215.52 202.13 210.65 CBC enc | 79.32 87.02 88.51 89.28 89.85 89.89 89.97 CBC dec | 28.29 112.20 203.30 209.82 214.99 201.51 209.95 CFB enc | 79.59 87.16 88.54 89.30 89.83 89.62 89.92 CFB dec | 28.12 111.05 202.47 209.02 214.21 210.90 209.12 CTR enc | 28.04 108.81 200.62 206.65 211.78 208.78 206.74 CTR dec | 28.02 108.82 200.45 206.62 211.78 208.74 206.70 sm4-ce-cipher ECB enc | 336.79 587.13 682.70 747.37 803.75 811.52 818.06 ECB dec | 339.18 584.52 679.72 743.68 798.82 803.83 811.54 CBC enc | 316.63 521.47 597.00 647.14 690.82 695.21 700.55 CBC dec | 291.80 503.79 585.66 640.82 689.86 695.16 701.72 CFB enc | 294.79 482.31 552.13 594.71 631.60 628.91 638.92 CFB dec | 293.09 466.44 526.56 563.17 594.41 592.26 601.97 CTR enc | 309.61 506.13 576.86 620.47 656.38 654.51 665.10 CTR dec | 306.69 505.57 576.84 620.18 657.09 654.52 665.32 sm4-ce ECB enc | 366.96 1329.81 2024.29 2755.50 3790.07 3861.91 4051.40 ECB dec | 367.30 1323.93 2018.72 2747.43 3787.39 3862.55 4052.62 CBC enc | 358.09 682.68 807.24 885.35 958.29 963.60 973.73 CBC dec | 366.51 1303.63 1978.64 2667.93 3624.53 3683.41 3856.08 CFB enc | 351.51 681.26 807.81 893.10 968.54 969.17 985.83 CFB dec | 354.98 1266.61 1929.63 2634.81 3614.23 3611.59 3841.68 CTR enc | 324.23 1121.25 1689.44 2256.70 2981.90 3007.79 3060.74 CTR dec | 324.18 1120.44 1694.31 2258.32 2982.01 3010.09 3060.99 Signed-off-by: Tianjia Zhang <tianjia.zhang@linux.alibaba.com> Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
2022-03-15 17:44:54 +08:00
rev32 v8.16b, RIV.16b
rev32 v9.16b, v0.16b
rev32 v10.16b, v1.16b
rev32 v11.16b, v2.16b
crypto: arm64/sm4 - add ARMv8 Crypto Extensions implementation This adds ARMv8 implementations of SM4 in ECB, CBC, CFB and CTR modes using Crypto Extensions, also includes key expansion operations because the Crypto Extensions instruction is much faster than software implementations. The Crypto Extensions for SM4 can only run on ARMv8 implementations that have support for these optional extensions. Benchmark on T-Head Yitian-710 2.75 GHz, the data comes from the 218 mode of tcrypt. The abscissas are blocks of different lengths. The data is tabulated and the unit is Mb/s: sm4-generic | 16 64 128 256 1024 1420 4096 ECB enc | 80.05 91.42 93.66 94.77 95.69 95.77 95.86 ECB dec | 79.98 91.41 93.64 94.76 95.66 95.77 95.85 CBC enc | 78.55 86.50 88.02 88.77 89.36 89.42 89.48 CBC dec | 76.82 89.06 91.52 92.77 93.75 93.83 93.96 CFB enc | 77.64 86.13 87.62 88.42 89.08 88.83 89.18 CFB dec | 77.57 88.34 90.36 91.45 92.34 92.00 92.44 CTR enc | 77.80 88.28 90.23 91.22 92.11 91.81 92.25 CTR dec | 77.83 88.22 90.22 91.22 92.04 91.82 92.28 sm4-neon ECB enc | 28.31 112.77 203.03 209.89 215.49 202.11 210.59 ECB dec | 28.36 113.45 203.23 210.00 215.52 202.13 210.65 CBC enc | 79.32 87.02 88.51 89.28 89.85 89.89 89.97 CBC dec | 28.29 112.20 203.30 209.82 214.99 201.51 209.95 CFB enc | 79.59 87.16 88.54 89.30 89.83 89.62 89.92 CFB dec | 28.12 111.05 202.47 209.02 214.21 210.90 209.12 CTR enc | 28.04 108.81 200.62 206.65 211.78 208.78 206.74 CTR dec | 28.02 108.82 200.45 206.62 211.78 208.74 206.70 sm4-ce-cipher ECB enc | 336.79 587.13 682.70 747.37 803.75 811.52 818.06 ECB dec | 339.18 584.52 679.72 743.68 798.82 803.83 811.54 CBC enc | 316.63 521.47 597.00 647.14 690.82 695.21 700.55 CBC dec | 291.80 503.79 585.66 640.82 689.86 695.16 701.72 CFB enc | 294.79 482.31 552.13 594.71 631.60 628.91 638.92 CFB dec | 293.09 466.44 526.56 563.17 594.41 592.26 601.97 CTR enc | 309.61 506.13 576.86 620.47 656.38 654.51 665.10 CTR dec | 306.69 505.57 576.84 620.18 657.09 654.52 665.32 sm4-ce ECB enc | 366.96 1329.81 2024.29 2755.50 3790.07 3861.91 4051.40 ECB dec | 367.30 1323.93 2018.72 2747.43 3787.39 3862.55 4052.62 CBC enc | 358.09 682.68 807.24 885.35 958.29 963.60 973.73 CBC dec | 366.51 1303.63 1978.64 2667.93 3624.53 3683.41 3856.08 CFB enc | 351.51 681.26 807.81 893.10 968.54 969.17 985.83 CFB dec | 354.98 1266.61 1929.63 2634.81 3614.23 3611.59 3841.68 CTR enc | 324.23 1121.25 1689.44 2256.70 2981.90 3007.79 3060.74 CTR dec | 324.18 1120.44 1694.31 2258.32 2982.01 3010.09 3060.99 Signed-off-by: Tianjia Zhang <tianjia.zhang@linux.alibaba.com> Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
2022-03-15 17:44:54 +08:00
SM4_CRYPT_BLK4_BE(v8, v9, v10, v11)
crypto: arm64/sm4 - add ARMv8 Crypto Extensions implementation This adds ARMv8 implementations of SM4 in ECB, CBC, CFB and CTR modes using Crypto Extensions, also includes key expansion operations because the Crypto Extensions instruction is much faster than software implementations. The Crypto Extensions for SM4 can only run on ARMv8 implementations that have support for these optional extensions. Benchmark on T-Head Yitian-710 2.75 GHz, the data comes from the 218 mode of tcrypt. The abscissas are blocks of different lengths. The data is tabulated and the unit is Mb/s: sm4-generic | 16 64 128 256 1024 1420 4096 ECB enc | 80.05 91.42 93.66 94.77 95.69 95.77 95.86 ECB dec | 79.98 91.41 93.64 94.76 95.66 95.77 95.85 CBC enc | 78.55 86.50 88.02 88.77 89.36 89.42 89.48 CBC dec | 76.82 89.06 91.52 92.77 93.75 93.83 93.96 CFB enc | 77.64 86.13 87.62 88.42 89.08 88.83 89.18 CFB dec | 77.57 88.34 90.36 91.45 92.34 92.00 92.44 CTR enc | 77.80 88.28 90.23 91.22 92.11 91.81 92.25 CTR dec | 77.83 88.22 90.22 91.22 92.04 91.82 92.28 sm4-neon ECB enc | 28.31 112.77 203.03 209.89 215.49 202.11 210.59 ECB dec | 28.36 113.45 203.23 210.00 215.52 202.13 210.65 CBC enc | 79.32 87.02 88.51 89.28 89.85 89.89 89.97 CBC dec | 28.29 112.20 203.30 209.82 214.99 201.51 209.95 CFB enc | 79.59 87.16 88.54 89.30 89.83 89.62 89.92 CFB dec | 28.12 111.05 202.47 209.02 214.21 210.90 209.12 CTR enc | 28.04 108.81 200.62 206.65 211.78 208.78 206.74 CTR dec | 28.02 108.82 200.45 206.62 211.78 208.74 206.70 sm4-ce-cipher ECB enc | 336.79 587.13 682.70 747.37 803.75 811.52 818.06 ECB dec | 339.18 584.52 679.72 743.68 798.82 803.83 811.54 CBC enc | 316.63 521.47 597.00 647.14 690.82 695.21 700.55 CBC dec | 291.80 503.79 585.66 640.82 689.86 695.16 701.72 CFB enc | 294.79 482.31 552.13 594.71 631.60 628.91 638.92 CFB dec | 293.09 466.44 526.56 563.17 594.41 592.26 601.97 CTR enc | 309.61 506.13 576.86 620.47 656.38 654.51 665.10 CTR dec | 306.69 505.57 576.84 620.18 657.09 654.52 665.32 sm4-ce ECB enc | 366.96 1329.81 2024.29 2755.50 3790.07 3861.91 4051.40 ECB dec | 367.30 1323.93 2018.72 2747.43 3787.39 3862.55 4052.62 CBC enc | 358.09 682.68 807.24 885.35 958.29 963.60 973.73 CBC dec | 366.51 1303.63 1978.64 2667.93 3624.53 3683.41 3856.08 CFB enc | 351.51 681.26 807.81 893.10 968.54 969.17 985.83 CFB dec | 354.98 1266.61 1929.63 2634.81 3614.23 3611.59 3841.68 CTR enc | 324.23 1121.25 1689.44 2256.70 2981.90 3007.79 3060.74 CTR dec | 324.18 1120.44 1694.31 2258.32 2982.01 3010.09 3060.99 Signed-off-by: Tianjia Zhang <tianjia.zhang@linux.alibaba.com> Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
2022-03-15 17:44:54 +08:00
mov RIV.16b, v3.16b
crypto: arm64/sm4 - add ARMv8 Crypto Extensions implementation This adds ARMv8 implementations of SM4 in ECB, CBC, CFB and CTR modes using Crypto Extensions, also includes key expansion operations because the Crypto Extensions instruction is much faster than software implementations. The Crypto Extensions for SM4 can only run on ARMv8 implementations that have support for these optional extensions. Benchmark on T-Head Yitian-710 2.75 GHz, the data comes from the 218 mode of tcrypt. The abscissas are blocks of different lengths. The data is tabulated and the unit is Mb/s: sm4-generic | 16 64 128 256 1024 1420 4096 ECB enc | 80.05 91.42 93.66 94.77 95.69 95.77 95.86 ECB dec | 79.98 91.41 93.64 94.76 95.66 95.77 95.85 CBC enc | 78.55 86.50 88.02 88.77 89.36 89.42 89.48 CBC dec | 76.82 89.06 91.52 92.77 93.75 93.83 93.96 CFB enc | 77.64 86.13 87.62 88.42 89.08 88.83 89.18 CFB dec | 77.57 88.34 90.36 91.45 92.34 92.00 92.44 CTR enc | 77.80 88.28 90.23 91.22 92.11 91.81 92.25 CTR dec | 77.83 88.22 90.22 91.22 92.04 91.82 92.28 sm4-neon ECB enc | 28.31 112.77 203.03 209.89 215.49 202.11 210.59 ECB dec | 28.36 113.45 203.23 210.00 215.52 202.13 210.65 CBC enc | 79.32 87.02 88.51 89.28 89.85 89.89 89.97 CBC dec | 28.29 112.20 203.30 209.82 214.99 201.51 209.95 CFB enc | 79.59 87.16 88.54 89.30 89.83 89.62 89.92 CFB dec | 28.12 111.05 202.47 209.02 214.21 210.90 209.12 CTR enc | 28.04 108.81 200.62 206.65 211.78 208.78 206.74 CTR dec | 28.02 108.82 200.45 206.62 211.78 208.74 206.70 sm4-ce-cipher ECB enc | 336.79 587.13 682.70 747.37 803.75 811.52 818.06 ECB dec | 339.18 584.52 679.72 743.68 798.82 803.83 811.54 CBC enc | 316.63 521.47 597.00 647.14 690.82 695.21 700.55 CBC dec | 291.80 503.79 585.66 640.82 689.86 695.16 701.72 CFB enc | 294.79 482.31 552.13 594.71 631.60 628.91 638.92 CFB dec | 293.09 466.44 526.56 563.17 594.41 592.26 601.97 CTR enc | 309.61 506.13 576.86 620.47 656.38 654.51 665.10 CTR dec | 306.69 505.57 576.84 620.18 657.09 654.52 665.32 sm4-ce ECB enc | 366.96 1329.81 2024.29 2755.50 3790.07 3861.91 4051.40 ECB dec | 367.30 1323.93 2018.72 2747.43 3787.39 3862.55 4052.62 CBC enc | 358.09 682.68 807.24 885.35 958.29 963.60 973.73 CBC dec | 366.51 1303.63 1978.64 2667.93 3624.53 3683.41 3856.08 CFB enc | 351.51 681.26 807.81 893.10 968.54 969.17 985.83 CFB dec | 354.98 1266.61 1929.63 2634.81 3614.23 3611.59 3841.68 CTR enc | 324.23 1121.25 1689.44 2256.70 2981.90 3007.79 3060.74 CTR dec | 324.18 1120.44 1694.31 2258.32 2982.01 3010.09 3060.99 Signed-off-by: Tianjia Zhang <tianjia.zhang@linux.alibaba.com> Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
2022-03-15 17:44:54 +08:00
eor v0.16b, v0.16b, v8.16b
eor v1.16b, v1.16b, v9.16b
eor v2.16b, v2.16b, v10.16b
eor v3.16b, v3.16b, v11.16b
crypto: arm64/sm4 - add ARMv8 Crypto Extensions implementation This adds ARMv8 implementations of SM4 in ECB, CBC, CFB and CTR modes using Crypto Extensions, also includes key expansion operations because the Crypto Extensions instruction is much faster than software implementations. The Crypto Extensions for SM4 can only run on ARMv8 implementations that have support for these optional extensions. Benchmark on T-Head Yitian-710 2.75 GHz, the data comes from the 218 mode of tcrypt. The abscissas are blocks of different lengths. The data is tabulated and the unit is Mb/s: sm4-generic | 16 64 128 256 1024 1420 4096 ECB enc | 80.05 91.42 93.66 94.77 95.69 95.77 95.86 ECB dec | 79.98 91.41 93.64 94.76 95.66 95.77 95.85 CBC enc | 78.55 86.50 88.02 88.77 89.36 89.42 89.48 CBC dec | 76.82 89.06 91.52 92.77 93.75 93.83 93.96 CFB enc | 77.64 86.13 87.62 88.42 89.08 88.83 89.18 CFB dec | 77.57 88.34 90.36 91.45 92.34 92.00 92.44 CTR enc | 77.80 88.28 90.23 91.22 92.11 91.81 92.25 CTR dec | 77.83 88.22 90.22 91.22 92.04 91.82 92.28 sm4-neon ECB enc | 28.31 112.77 203.03 209.89 215.49 202.11 210.59 ECB dec | 28.36 113.45 203.23 210.00 215.52 202.13 210.65 CBC enc | 79.32 87.02 88.51 89.28 89.85 89.89 89.97 CBC dec | 28.29 112.20 203.30 209.82 214.99 201.51 209.95 CFB enc | 79.59 87.16 88.54 89.30 89.83 89.62 89.92 CFB dec | 28.12 111.05 202.47 209.02 214.21 210.90 209.12 CTR enc | 28.04 108.81 200.62 206.65 211.78 208.78 206.74 CTR dec | 28.02 108.82 200.45 206.62 211.78 208.74 206.70 sm4-ce-cipher ECB enc | 336.79 587.13 682.70 747.37 803.75 811.52 818.06 ECB dec | 339.18 584.52 679.72 743.68 798.82 803.83 811.54 CBC enc | 316.63 521.47 597.00 647.14 690.82 695.21 700.55 CBC dec | 291.80 503.79 585.66 640.82 689.86 695.16 701.72 CFB enc | 294.79 482.31 552.13 594.71 631.60 628.91 638.92 CFB dec | 293.09 466.44 526.56 563.17 594.41 592.26 601.97 CTR enc | 309.61 506.13 576.86 620.47 656.38 654.51 665.10 CTR dec | 306.69 505.57 576.84 620.18 657.09 654.52 665.32 sm4-ce ECB enc | 366.96 1329.81 2024.29 2755.50 3790.07 3861.91 4051.40 ECB dec | 367.30 1323.93 2018.72 2747.43 3787.39 3862.55 4052.62 CBC enc | 358.09 682.68 807.24 885.35 958.29 963.60 973.73 CBC dec | 366.51 1303.63 1978.64 2667.93 3624.53 3683.41 3856.08 CFB enc | 351.51 681.26 807.81 893.10 968.54 969.17 985.83 CFB dec | 354.98 1266.61 1929.63 2634.81 3614.23 3611.59 3841.68 CTR enc | 324.23 1121.25 1689.44 2256.70 2981.90 3007.79 3060.74 CTR dec | 324.18 1120.44 1694.31 2258.32 2982.01 3010.09 3060.99 Signed-off-by: Tianjia Zhang <tianjia.zhang@linux.alibaba.com> Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
2022-03-15 17:44:54 +08:00
st1 {v0.16b-v3.16b}, [x1], #64
crypto: arm64/sm4 - add ARMv8 Crypto Extensions implementation This adds ARMv8 implementations of SM4 in ECB, CBC, CFB and CTR modes using Crypto Extensions, also includes key expansion operations because the Crypto Extensions instruction is much faster than software implementations. The Crypto Extensions for SM4 can only run on ARMv8 implementations that have support for these optional extensions. Benchmark on T-Head Yitian-710 2.75 GHz, the data comes from the 218 mode of tcrypt. The abscissas are blocks of different lengths. The data is tabulated and the unit is Mb/s: sm4-generic | 16 64 128 256 1024 1420 4096 ECB enc | 80.05 91.42 93.66 94.77 95.69 95.77 95.86 ECB dec | 79.98 91.41 93.64 94.76 95.66 95.77 95.85 CBC enc | 78.55 86.50 88.02 88.77 89.36 89.42 89.48 CBC dec | 76.82 89.06 91.52 92.77 93.75 93.83 93.96 CFB enc | 77.64 86.13 87.62 88.42 89.08 88.83 89.18 CFB dec | 77.57 88.34 90.36 91.45 92.34 92.00 92.44 CTR enc | 77.80 88.28 90.23 91.22 92.11 91.81 92.25 CTR dec | 77.83 88.22 90.22 91.22 92.04 91.82 92.28 sm4-neon ECB enc | 28.31 112.77 203.03 209.89 215.49 202.11 210.59 ECB dec | 28.36 113.45 203.23 210.00 215.52 202.13 210.65 CBC enc | 79.32 87.02 88.51 89.28 89.85 89.89 89.97 CBC dec | 28.29 112.20 203.30 209.82 214.99 201.51 209.95 CFB enc | 79.59 87.16 88.54 89.30 89.83 89.62 89.92 CFB dec | 28.12 111.05 202.47 209.02 214.21 210.90 209.12 CTR enc | 28.04 108.81 200.62 206.65 211.78 208.78 206.74 CTR dec | 28.02 108.82 200.45 206.62 211.78 208.74 206.70 sm4-ce-cipher ECB enc | 336.79 587.13 682.70 747.37 803.75 811.52 818.06 ECB dec | 339.18 584.52 679.72 743.68 798.82 803.83 811.54 CBC enc | 316.63 521.47 597.00 647.14 690.82 695.21 700.55 CBC dec | 291.80 503.79 585.66 640.82 689.86 695.16 701.72 CFB enc | 294.79 482.31 552.13 594.71 631.60 628.91 638.92 CFB dec | 293.09 466.44 526.56 563.17 594.41 592.26 601.97 CTR enc | 309.61 506.13 576.86 620.47 656.38 654.51 665.10 CTR dec | 306.69 505.57 576.84 620.18 657.09 654.52 665.32 sm4-ce ECB enc | 366.96 1329.81 2024.29 2755.50 3790.07 3861.91 4051.40 ECB dec | 367.30 1323.93 2018.72 2747.43 3787.39 3862.55 4052.62 CBC enc | 358.09 682.68 807.24 885.35 958.29 963.60 973.73 CBC dec | 366.51 1303.63 1978.64 2667.93 3624.53 3683.41 3856.08 CFB enc | 351.51 681.26 807.81 893.10 968.54 969.17 985.83 CFB dec | 354.98 1266.61 1929.63 2634.81 3614.23 3611.59 3841.68 CTR enc | 324.23 1121.25 1689.44 2256.70 2981.90 3007.79 3060.74 CTR dec | 324.18 1120.44 1694.31 2258.32 2982.01 3010.09 3060.99 Signed-off-by: Tianjia Zhang <tianjia.zhang@linux.alibaba.com> Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
2022-03-15 17:44:54 +08:00
cbz w4, .Lcfb_dec_end
.Lcfb_dec_loop_1x:
sub w4, w4, #1
ld1 {v0.16b}, [x2], #16
crypto: arm64/sm4 - add ARMv8 Crypto Extensions implementation This adds ARMv8 implementations of SM4 in ECB, CBC, CFB and CTR modes using Crypto Extensions, also includes key expansion operations because the Crypto Extensions instruction is much faster than software implementations. The Crypto Extensions for SM4 can only run on ARMv8 implementations that have support for these optional extensions. Benchmark on T-Head Yitian-710 2.75 GHz, the data comes from the 218 mode of tcrypt. The abscissas are blocks of different lengths. The data is tabulated and the unit is Mb/s: sm4-generic | 16 64 128 256 1024 1420 4096 ECB enc | 80.05 91.42 93.66 94.77 95.69 95.77 95.86 ECB dec | 79.98 91.41 93.64 94.76 95.66 95.77 95.85 CBC enc | 78.55 86.50 88.02 88.77 89.36 89.42 89.48 CBC dec | 76.82 89.06 91.52 92.77 93.75 93.83 93.96 CFB enc | 77.64 86.13 87.62 88.42 89.08 88.83 89.18 CFB dec | 77.57 88.34 90.36 91.45 92.34 92.00 92.44 CTR enc | 77.80 88.28 90.23 91.22 92.11 91.81 92.25 CTR dec | 77.83 88.22 90.22 91.22 92.04 91.82 92.28 sm4-neon ECB enc | 28.31 112.77 203.03 209.89 215.49 202.11 210.59 ECB dec | 28.36 113.45 203.23 210.00 215.52 202.13 210.65 CBC enc | 79.32 87.02 88.51 89.28 89.85 89.89 89.97 CBC dec | 28.29 112.20 203.30 209.82 214.99 201.51 209.95 CFB enc | 79.59 87.16 88.54 89.30 89.83 89.62 89.92 CFB dec | 28.12 111.05 202.47 209.02 214.21 210.90 209.12 CTR enc | 28.04 108.81 200.62 206.65 211.78 208.78 206.74 CTR dec | 28.02 108.82 200.45 206.62 211.78 208.74 206.70 sm4-ce-cipher ECB enc | 336.79 587.13 682.70 747.37 803.75 811.52 818.06 ECB dec | 339.18 584.52 679.72 743.68 798.82 803.83 811.54 CBC enc | 316.63 521.47 597.00 647.14 690.82 695.21 700.55 CBC dec | 291.80 503.79 585.66 640.82 689.86 695.16 701.72 CFB enc | 294.79 482.31 552.13 594.71 631.60 628.91 638.92 CFB dec | 293.09 466.44 526.56 563.17 594.41 592.26 601.97 CTR enc | 309.61 506.13 576.86 620.47 656.38 654.51 665.10 CTR dec | 306.69 505.57 576.84 620.18 657.09 654.52 665.32 sm4-ce ECB enc | 366.96 1329.81 2024.29 2755.50 3790.07 3861.91 4051.40 ECB dec | 367.30 1323.93 2018.72 2747.43 3787.39 3862.55 4052.62 CBC enc | 358.09 682.68 807.24 885.35 958.29 963.60 973.73 CBC dec | 366.51 1303.63 1978.64 2667.93 3624.53 3683.41 3856.08 CFB enc | 351.51 681.26 807.81 893.10 968.54 969.17 985.83 CFB dec | 354.98 1266.61 1929.63 2634.81 3614.23 3611.59 3841.68 CTR enc | 324.23 1121.25 1689.44 2256.70 2981.90 3007.79 3060.74 CTR dec | 324.18 1120.44 1694.31 2258.32 2982.01 3010.09 3060.99 Signed-off-by: Tianjia Zhang <tianjia.zhang@linux.alibaba.com> Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
2022-03-15 17:44:54 +08:00
SM4_CRYPT_BLK(RIV)
crypto: arm64/sm4 - add ARMv8 Crypto Extensions implementation This adds ARMv8 implementations of SM4 in ECB, CBC, CFB and CTR modes using Crypto Extensions, also includes key expansion operations because the Crypto Extensions instruction is much faster than software implementations. The Crypto Extensions for SM4 can only run on ARMv8 implementations that have support for these optional extensions. Benchmark on T-Head Yitian-710 2.75 GHz, the data comes from the 218 mode of tcrypt. The abscissas are blocks of different lengths. The data is tabulated and the unit is Mb/s: sm4-generic | 16 64 128 256 1024 1420 4096 ECB enc | 80.05 91.42 93.66 94.77 95.69 95.77 95.86 ECB dec | 79.98 91.41 93.64 94.76 95.66 95.77 95.85 CBC enc | 78.55 86.50 88.02 88.77 89.36 89.42 89.48 CBC dec | 76.82 89.06 91.52 92.77 93.75 93.83 93.96 CFB enc | 77.64 86.13 87.62 88.42 89.08 88.83 89.18 CFB dec | 77.57 88.34 90.36 91.45 92.34 92.00 92.44 CTR enc | 77.80 88.28 90.23 91.22 92.11 91.81 92.25 CTR dec | 77.83 88.22 90.22 91.22 92.04 91.82 92.28 sm4-neon ECB enc | 28.31 112.77 203.03 209.89 215.49 202.11 210.59 ECB dec | 28.36 113.45 203.23 210.00 215.52 202.13 210.65 CBC enc | 79.32 87.02 88.51 89.28 89.85 89.89 89.97 CBC dec | 28.29 112.20 203.30 209.82 214.99 201.51 209.95 CFB enc | 79.59 87.16 88.54 89.30 89.83 89.62 89.92 CFB dec | 28.12 111.05 202.47 209.02 214.21 210.90 209.12 CTR enc | 28.04 108.81 200.62 206.65 211.78 208.78 206.74 CTR dec | 28.02 108.82 200.45 206.62 211.78 208.74 206.70 sm4-ce-cipher ECB enc | 336.79 587.13 682.70 747.37 803.75 811.52 818.06 ECB dec | 339.18 584.52 679.72 743.68 798.82 803.83 811.54 CBC enc | 316.63 521.47 597.00 647.14 690.82 695.21 700.55 CBC dec | 291.80 503.79 585.66 640.82 689.86 695.16 701.72 CFB enc | 294.79 482.31 552.13 594.71 631.60 628.91 638.92 CFB dec | 293.09 466.44 526.56 563.17 594.41 592.26 601.97 CTR enc | 309.61 506.13 576.86 620.47 656.38 654.51 665.10 CTR dec | 306.69 505.57 576.84 620.18 657.09 654.52 665.32 sm4-ce ECB enc | 366.96 1329.81 2024.29 2755.50 3790.07 3861.91 4051.40 ECB dec | 367.30 1323.93 2018.72 2747.43 3787.39 3862.55 4052.62 CBC enc | 358.09 682.68 807.24 885.35 958.29 963.60 973.73 CBC dec | 366.51 1303.63 1978.64 2667.93 3624.53 3683.41 3856.08 CFB enc | 351.51 681.26 807.81 893.10 968.54 969.17 985.83 CFB dec | 354.98 1266.61 1929.63 2634.81 3614.23 3611.59 3841.68 CTR enc | 324.23 1121.25 1689.44 2256.70 2981.90 3007.79 3060.74 CTR dec | 324.18 1120.44 1694.31 2258.32 2982.01 3010.09 3060.99 Signed-off-by: Tianjia Zhang <tianjia.zhang@linux.alibaba.com> Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
2022-03-15 17:44:54 +08:00
eor RIV.16b, RIV.16b, v0.16b
st1 {RIV.16b}, [x1], #16
crypto: arm64/sm4 - add ARMv8 Crypto Extensions implementation This adds ARMv8 implementations of SM4 in ECB, CBC, CFB and CTR modes using Crypto Extensions, also includes key expansion operations because the Crypto Extensions instruction is much faster than software implementations. The Crypto Extensions for SM4 can only run on ARMv8 implementations that have support for these optional extensions. Benchmark on T-Head Yitian-710 2.75 GHz, the data comes from the 218 mode of tcrypt. The abscissas are blocks of different lengths. The data is tabulated and the unit is Mb/s: sm4-generic | 16 64 128 256 1024 1420 4096 ECB enc | 80.05 91.42 93.66 94.77 95.69 95.77 95.86 ECB dec | 79.98 91.41 93.64 94.76 95.66 95.77 95.85 CBC enc | 78.55 86.50 88.02 88.77 89.36 89.42 89.48 CBC dec | 76.82 89.06 91.52 92.77 93.75 93.83 93.96 CFB enc | 77.64 86.13 87.62 88.42 89.08 88.83 89.18 CFB dec | 77.57 88.34 90.36 91.45 92.34 92.00 92.44 CTR enc | 77.80 88.28 90.23 91.22 92.11 91.81 92.25 CTR dec | 77.83 88.22 90.22 91.22 92.04 91.82 92.28 sm4-neon ECB enc | 28.31 112.77 203.03 209.89 215.49 202.11 210.59 ECB dec | 28.36 113.45 203.23 210.00 215.52 202.13 210.65 CBC enc | 79.32 87.02 88.51 89.28 89.85 89.89 89.97 CBC dec | 28.29 112.20 203.30 209.82 214.99 201.51 209.95 CFB enc | 79.59 87.16 88.54 89.30 89.83 89.62 89.92 CFB dec | 28.12 111.05 202.47 209.02 214.21 210.90 209.12 CTR enc | 28.04 108.81 200.62 206.65 211.78 208.78 206.74 CTR dec | 28.02 108.82 200.45 206.62 211.78 208.74 206.70 sm4-ce-cipher ECB enc | 336.79 587.13 682.70 747.37 803.75 811.52 818.06 ECB dec | 339.18 584.52 679.72 743.68 798.82 803.83 811.54 CBC enc | 316.63 521.47 597.00 647.14 690.82 695.21 700.55 CBC dec | 291.80 503.79 585.66 640.82 689.86 695.16 701.72 CFB enc | 294.79 482.31 552.13 594.71 631.60 628.91 638.92 CFB dec | 293.09 466.44 526.56 563.17 594.41 592.26 601.97 CTR enc | 309.61 506.13 576.86 620.47 656.38 654.51 665.10 CTR dec | 306.69 505.57 576.84 620.18 657.09 654.52 665.32 sm4-ce ECB enc | 366.96 1329.81 2024.29 2755.50 3790.07 3861.91 4051.40 ECB dec | 367.30 1323.93 2018.72 2747.43 3787.39 3862.55 4052.62 CBC enc | 358.09 682.68 807.24 885.35 958.29 963.60 973.73 CBC dec | 366.51 1303.63 1978.64 2667.93 3624.53 3683.41 3856.08 CFB enc | 351.51 681.26 807.81 893.10 968.54 969.17 985.83 CFB dec | 354.98 1266.61 1929.63 2634.81 3614.23 3611.59 3841.68 CTR enc | 324.23 1121.25 1689.44 2256.70 2981.90 3007.79 3060.74 CTR dec | 324.18 1120.44 1694.31 2258.32 2982.01 3010.09 3060.99 Signed-off-by: Tianjia Zhang <tianjia.zhang@linux.alibaba.com> Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
2022-03-15 17:44:54 +08:00
mov RIV.16b, v0.16b
cbnz w4, .Lcfb_dec_loop_1x
.Lcfb_dec_end:
crypto: arm64/sm4 - add ARMv8 Crypto Extensions implementation This adds ARMv8 implementations of SM4 in ECB, CBC, CFB and CTR modes using Crypto Extensions, also includes key expansion operations because the Crypto Extensions instruction is much faster than software implementations. The Crypto Extensions for SM4 can only run on ARMv8 implementations that have support for these optional extensions. Benchmark on T-Head Yitian-710 2.75 GHz, the data comes from the 218 mode of tcrypt. The abscissas are blocks of different lengths. The data is tabulated and the unit is Mb/s: sm4-generic | 16 64 128 256 1024 1420 4096 ECB enc | 80.05 91.42 93.66 94.77 95.69 95.77 95.86 ECB dec | 79.98 91.41 93.64 94.76 95.66 95.77 95.85 CBC enc | 78.55 86.50 88.02 88.77 89.36 89.42 89.48 CBC dec | 76.82 89.06 91.52 92.77 93.75 93.83 93.96 CFB enc | 77.64 86.13 87.62 88.42 89.08 88.83 89.18 CFB dec | 77.57 88.34 90.36 91.45 92.34 92.00 92.44 CTR enc | 77.80 88.28 90.23 91.22 92.11 91.81 92.25 CTR dec | 77.83 88.22 90.22 91.22 92.04 91.82 92.28 sm4-neon ECB enc | 28.31 112.77 203.03 209.89 215.49 202.11 210.59 ECB dec | 28.36 113.45 203.23 210.00 215.52 202.13 210.65 CBC enc | 79.32 87.02 88.51 89.28 89.85 89.89 89.97 CBC dec | 28.29 112.20 203.30 209.82 214.99 201.51 209.95 CFB enc | 79.59 87.16 88.54 89.30 89.83 89.62 89.92 CFB dec | 28.12 111.05 202.47 209.02 214.21 210.90 209.12 CTR enc | 28.04 108.81 200.62 206.65 211.78 208.78 206.74 CTR dec | 28.02 108.82 200.45 206.62 211.78 208.74 206.70 sm4-ce-cipher ECB enc | 336.79 587.13 682.70 747.37 803.75 811.52 818.06 ECB dec | 339.18 584.52 679.72 743.68 798.82 803.83 811.54 CBC enc | 316.63 521.47 597.00 647.14 690.82 695.21 700.55 CBC dec | 291.80 503.79 585.66 640.82 689.86 695.16 701.72 CFB enc | 294.79 482.31 552.13 594.71 631.60 628.91 638.92 CFB dec | 293.09 466.44 526.56 563.17 594.41 592.26 601.97 CTR enc | 309.61 506.13 576.86 620.47 656.38 654.51 665.10 CTR dec | 306.69 505.57 576.84 620.18 657.09 654.52 665.32 sm4-ce ECB enc | 366.96 1329.81 2024.29 2755.50 3790.07 3861.91 4051.40 ECB dec | 367.30 1323.93 2018.72 2747.43 3787.39 3862.55 4052.62 CBC enc | 358.09 682.68 807.24 885.35 958.29 963.60 973.73 CBC dec | 366.51 1303.63 1978.64 2667.93 3624.53 3683.41 3856.08 CFB enc | 351.51 681.26 807.81 893.10 968.54 969.17 985.83 CFB dec | 354.98 1266.61 1929.63 2634.81 3614.23 3611.59 3841.68 CTR enc | 324.23 1121.25 1689.44 2256.70 2981.90 3007.79 3060.74 CTR dec | 324.18 1120.44 1694.31 2258.32 2982.01 3010.09 3060.99 Signed-off-by: Tianjia Zhang <tianjia.zhang@linux.alibaba.com> Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
2022-03-15 17:44:54 +08:00
/* store new IV */
st1 {RIV.16b}, [x3]
crypto: arm64/sm4 - add ARMv8 Crypto Extensions implementation This adds ARMv8 implementations of SM4 in ECB, CBC, CFB and CTR modes using Crypto Extensions, also includes key expansion operations because the Crypto Extensions instruction is much faster than software implementations. The Crypto Extensions for SM4 can only run on ARMv8 implementations that have support for these optional extensions. Benchmark on T-Head Yitian-710 2.75 GHz, the data comes from the 218 mode of tcrypt. The abscissas are blocks of different lengths. The data is tabulated and the unit is Mb/s: sm4-generic | 16 64 128 256 1024 1420 4096 ECB enc | 80.05 91.42 93.66 94.77 95.69 95.77 95.86 ECB dec | 79.98 91.41 93.64 94.76 95.66 95.77 95.85 CBC enc | 78.55 86.50 88.02 88.77 89.36 89.42 89.48 CBC dec | 76.82 89.06 91.52 92.77 93.75 93.83 93.96 CFB enc | 77.64 86.13 87.62 88.42 89.08 88.83 89.18 CFB dec | 77.57 88.34 90.36 91.45 92.34 92.00 92.44 CTR enc | 77.80 88.28 90.23 91.22 92.11 91.81 92.25 CTR dec | 77.83 88.22 90.22 91.22 92.04 91.82 92.28 sm4-neon ECB enc | 28.31 112.77 203.03 209.89 215.49 202.11 210.59 ECB dec | 28.36 113.45 203.23 210.00 215.52 202.13 210.65 CBC enc | 79.32 87.02 88.51 89.28 89.85 89.89 89.97 CBC dec | 28.29 112.20 203.30 209.82 214.99 201.51 209.95 CFB enc | 79.59 87.16 88.54 89.30 89.83 89.62 89.92 CFB dec | 28.12 111.05 202.47 209.02 214.21 210.90 209.12 CTR enc | 28.04 108.81 200.62 206.65 211.78 208.78 206.74 CTR dec | 28.02 108.82 200.45 206.62 211.78 208.74 206.70 sm4-ce-cipher ECB enc | 336.79 587.13 682.70 747.37 803.75 811.52 818.06 ECB dec | 339.18 584.52 679.72 743.68 798.82 803.83 811.54 CBC enc | 316.63 521.47 597.00 647.14 690.82 695.21 700.55 CBC dec | 291.80 503.79 585.66 640.82 689.86 695.16 701.72 CFB enc | 294.79 482.31 552.13 594.71 631.60 628.91 638.92 CFB dec | 293.09 466.44 526.56 563.17 594.41 592.26 601.97 CTR enc | 309.61 506.13 576.86 620.47 656.38 654.51 665.10 CTR dec | 306.69 505.57 576.84 620.18 657.09 654.52 665.32 sm4-ce ECB enc | 366.96 1329.81 2024.29 2755.50 3790.07 3861.91 4051.40 ECB dec | 367.30 1323.93 2018.72 2747.43 3787.39 3862.55 4052.62 CBC enc | 358.09 682.68 807.24 885.35 958.29 963.60 973.73 CBC dec | 366.51 1303.63 1978.64 2667.93 3624.53 3683.41 3856.08 CFB enc | 351.51 681.26 807.81 893.10 968.54 969.17 985.83 CFB dec | 354.98 1266.61 1929.63 2634.81 3614.23 3611.59 3841.68 CTR enc | 324.23 1121.25 1689.44 2256.70 2981.90 3007.79 3060.74 CTR dec | 324.18 1120.44 1694.31 2258.32 2982.01 3010.09 3060.99 Signed-off-by: Tianjia Zhang <tianjia.zhang@linux.alibaba.com> Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
2022-03-15 17:44:54 +08:00
ret
crypto: arm64/sm4 - add ARMv8 Crypto Extensions implementation This adds ARMv8 implementations of SM4 in ECB, CBC, CFB and CTR modes using Crypto Extensions, also includes key expansion operations because the Crypto Extensions instruction is much faster than software implementations. The Crypto Extensions for SM4 can only run on ARMv8 implementations that have support for these optional extensions. Benchmark on T-Head Yitian-710 2.75 GHz, the data comes from the 218 mode of tcrypt. The abscissas are blocks of different lengths. The data is tabulated and the unit is Mb/s: sm4-generic | 16 64 128 256 1024 1420 4096 ECB enc | 80.05 91.42 93.66 94.77 95.69 95.77 95.86 ECB dec | 79.98 91.41 93.64 94.76 95.66 95.77 95.85 CBC enc | 78.55 86.50 88.02 88.77 89.36 89.42 89.48 CBC dec | 76.82 89.06 91.52 92.77 93.75 93.83 93.96 CFB enc | 77.64 86.13 87.62 88.42 89.08 88.83 89.18 CFB dec | 77.57 88.34 90.36 91.45 92.34 92.00 92.44 CTR enc | 77.80 88.28 90.23 91.22 92.11 91.81 92.25 CTR dec | 77.83 88.22 90.22 91.22 92.04 91.82 92.28 sm4-neon ECB enc | 28.31 112.77 203.03 209.89 215.49 202.11 210.59 ECB dec | 28.36 113.45 203.23 210.00 215.52 202.13 210.65 CBC enc | 79.32 87.02 88.51 89.28 89.85 89.89 89.97 CBC dec | 28.29 112.20 203.30 209.82 214.99 201.51 209.95 CFB enc | 79.59 87.16 88.54 89.30 89.83 89.62 89.92 CFB dec | 28.12 111.05 202.47 209.02 214.21 210.90 209.12 CTR enc | 28.04 108.81 200.62 206.65 211.78 208.78 206.74 CTR dec | 28.02 108.82 200.45 206.62 211.78 208.74 206.70 sm4-ce-cipher ECB enc | 336.79 587.13 682.70 747.37 803.75 811.52 818.06 ECB dec | 339.18 584.52 679.72 743.68 798.82 803.83 811.54 CBC enc | 316.63 521.47 597.00 647.14 690.82 695.21 700.55 CBC dec | 291.80 503.79 585.66 640.82 689.86 695.16 701.72 CFB enc | 294.79 482.31 552.13 594.71 631.60 628.91 638.92 CFB dec | 293.09 466.44 526.56 563.17 594.41 592.26 601.97 CTR enc | 309.61 506.13 576.86 620.47 656.38 654.51 665.10 CTR dec | 306.69 505.57 576.84 620.18 657.09 654.52 665.32 sm4-ce ECB enc | 366.96 1329.81 2024.29 2755.50 3790.07 3861.91 4051.40 ECB dec | 367.30 1323.93 2018.72 2747.43 3787.39 3862.55 4052.62 CBC enc | 358.09 682.68 807.24 885.35 958.29 963.60 973.73 CBC dec | 366.51 1303.63 1978.64 2667.93 3624.53 3683.41 3856.08 CFB enc | 351.51 681.26 807.81 893.10 968.54 969.17 985.83 CFB dec | 354.98 1266.61 1929.63 2634.81 3614.23 3611.59 3841.68 CTR enc | 324.23 1121.25 1689.44 2256.70 2981.90 3007.79 3060.74 CTR dec | 324.18 1120.44 1694.31 2258.32 2982.01 3010.09 3060.99 Signed-off-by: Tianjia Zhang <tianjia.zhang@linux.alibaba.com> Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
2022-03-15 17:44:54 +08:00
SYM_FUNC_END(sm4_ce_cfb_dec)
.align 3
SYM_FUNC_START(sm4_ce_ctr_enc)
/* input:
* x0: round key array, CTX
* x1: dst
* x2: src
* x3: ctr (big endian, 128 bit)
* w4: nblocks
*/
SM4_PREPARE(x0)
crypto: arm64/sm4 - add ARMv8 Crypto Extensions implementation This adds ARMv8 implementations of SM4 in ECB, CBC, CFB and CTR modes using Crypto Extensions, also includes key expansion operations because the Crypto Extensions instruction is much faster than software implementations. The Crypto Extensions for SM4 can only run on ARMv8 implementations that have support for these optional extensions. Benchmark on T-Head Yitian-710 2.75 GHz, the data comes from the 218 mode of tcrypt. The abscissas are blocks of different lengths. The data is tabulated and the unit is Mb/s: sm4-generic | 16 64 128 256 1024 1420 4096 ECB enc | 80.05 91.42 93.66 94.77 95.69 95.77 95.86 ECB dec | 79.98 91.41 93.64 94.76 95.66 95.77 95.85 CBC enc | 78.55 86.50 88.02 88.77 89.36 89.42 89.48 CBC dec | 76.82 89.06 91.52 92.77 93.75 93.83 93.96 CFB enc | 77.64 86.13 87.62 88.42 89.08 88.83 89.18 CFB dec | 77.57 88.34 90.36 91.45 92.34 92.00 92.44 CTR enc | 77.80 88.28 90.23 91.22 92.11 91.81 92.25 CTR dec | 77.83 88.22 90.22 91.22 92.04 91.82 92.28 sm4-neon ECB enc | 28.31 112.77 203.03 209.89 215.49 202.11 210.59 ECB dec | 28.36 113.45 203.23 210.00 215.52 202.13 210.65 CBC enc | 79.32 87.02 88.51 89.28 89.85 89.89 89.97 CBC dec | 28.29 112.20 203.30 209.82 214.99 201.51 209.95 CFB enc | 79.59 87.16 88.54 89.30 89.83 89.62 89.92 CFB dec | 28.12 111.05 202.47 209.02 214.21 210.90 209.12 CTR enc | 28.04 108.81 200.62 206.65 211.78 208.78 206.74 CTR dec | 28.02 108.82 200.45 206.62 211.78 208.74 206.70 sm4-ce-cipher ECB enc | 336.79 587.13 682.70 747.37 803.75 811.52 818.06 ECB dec | 339.18 584.52 679.72 743.68 798.82 803.83 811.54 CBC enc | 316.63 521.47 597.00 647.14 690.82 695.21 700.55 CBC dec | 291.80 503.79 585.66 640.82 689.86 695.16 701.72 CFB enc | 294.79 482.31 552.13 594.71 631.60 628.91 638.92 CFB dec | 293.09 466.44 526.56 563.17 594.41 592.26 601.97 CTR enc | 309.61 506.13 576.86 620.47 656.38 654.51 665.10 CTR dec | 306.69 505.57 576.84 620.18 657.09 654.52 665.32 sm4-ce ECB enc | 366.96 1329.81 2024.29 2755.50 3790.07 3861.91 4051.40 ECB dec | 367.30 1323.93 2018.72 2747.43 3787.39 3862.55 4052.62 CBC enc | 358.09 682.68 807.24 885.35 958.29 963.60 973.73 CBC dec | 366.51 1303.63 1978.64 2667.93 3624.53 3683.41 3856.08 CFB enc | 351.51 681.26 807.81 893.10 968.54 969.17 985.83 CFB dec | 354.98 1266.61 1929.63 2634.81 3614.23 3611.59 3841.68 CTR enc | 324.23 1121.25 1689.44 2256.70 2981.90 3007.79 3060.74 CTR dec | 324.18 1120.44 1694.31 2258.32 2982.01 3010.09 3060.99 Signed-off-by: Tianjia Zhang <tianjia.zhang@linux.alibaba.com> Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
2022-03-15 17:44:54 +08:00
ldp x7, x8, [x3]
rev x7, x7
rev x8, x8
crypto: arm64/sm4 - add ARMv8 Crypto Extensions implementation This adds ARMv8 implementations of SM4 in ECB, CBC, CFB and CTR modes using Crypto Extensions, also includes key expansion operations because the Crypto Extensions instruction is much faster than software implementations. The Crypto Extensions for SM4 can only run on ARMv8 implementations that have support for these optional extensions. Benchmark on T-Head Yitian-710 2.75 GHz, the data comes from the 218 mode of tcrypt. The abscissas are blocks of different lengths. The data is tabulated and the unit is Mb/s: sm4-generic | 16 64 128 256 1024 1420 4096 ECB enc | 80.05 91.42 93.66 94.77 95.69 95.77 95.86 ECB dec | 79.98 91.41 93.64 94.76 95.66 95.77 95.85 CBC enc | 78.55 86.50 88.02 88.77 89.36 89.42 89.48 CBC dec | 76.82 89.06 91.52 92.77 93.75 93.83 93.96 CFB enc | 77.64 86.13 87.62 88.42 89.08 88.83 89.18 CFB dec | 77.57 88.34 90.36 91.45 92.34 92.00 92.44 CTR enc | 77.80 88.28 90.23 91.22 92.11 91.81 92.25 CTR dec | 77.83 88.22 90.22 91.22 92.04 91.82 92.28 sm4-neon ECB enc | 28.31 112.77 203.03 209.89 215.49 202.11 210.59 ECB dec | 28.36 113.45 203.23 210.00 215.52 202.13 210.65 CBC enc | 79.32 87.02 88.51 89.28 89.85 89.89 89.97 CBC dec | 28.29 112.20 203.30 209.82 214.99 201.51 209.95 CFB enc | 79.59 87.16 88.54 89.30 89.83 89.62 89.92 CFB dec | 28.12 111.05 202.47 209.02 214.21 210.90 209.12 CTR enc | 28.04 108.81 200.62 206.65 211.78 208.78 206.74 CTR dec | 28.02 108.82 200.45 206.62 211.78 208.74 206.70 sm4-ce-cipher ECB enc | 336.79 587.13 682.70 747.37 803.75 811.52 818.06 ECB dec | 339.18 584.52 679.72 743.68 798.82 803.83 811.54 CBC enc | 316.63 521.47 597.00 647.14 690.82 695.21 700.55 CBC dec | 291.80 503.79 585.66 640.82 689.86 695.16 701.72 CFB enc | 294.79 482.31 552.13 594.71 631.60 628.91 638.92 CFB dec | 293.09 466.44 526.56 563.17 594.41 592.26 601.97 CTR enc | 309.61 506.13 576.86 620.47 656.38 654.51 665.10 CTR dec | 306.69 505.57 576.84 620.18 657.09 654.52 665.32 sm4-ce ECB enc | 366.96 1329.81 2024.29 2755.50 3790.07 3861.91 4051.40 ECB dec | 367.30 1323.93 2018.72 2747.43 3787.39 3862.55 4052.62 CBC enc | 358.09 682.68 807.24 885.35 958.29 963.60 973.73 CBC dec | 366.51 1303.63 1978.64 2667.93 3624.53 3683.41 3856.08 CFB enc | 351.51 681.26 807.81 893.10 968.54 969.17 985.83 CFB dec | 354.98 1266.61 1929.63 2634.81 3614.23 3611.59 3841.68 CTR enc | 324.23 1121.25 1689.44 2256.70 2981.90 3007.79 3060.74 CTR dec | 324.18 1120.44 1694.31 2258.32 2982.01 3010.09 3060.99 Signed-off-by: Tianjia Zhang <tianjia.zhang@linux.alibaba.com> Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
2022-03-15 17:44:54 +08:00
.Lctr_loop_8x:
sub w4, w4, #8
tbnz w4, #31, .Lctr_4x
crypto: arm64/sm4 - add ARMv8 Crypto Extensions implementation This adds ARMv8 implementations of SM4 in ECB, CBC, CFB and CTR modes using Crypto Extensions, also includes key expansion operations because the Crypto Extensions instruction is much faster than software implementations. The Crypto Extensions for SM4 can only run on ARMv8 implementations that have support for these optional extensions. Benchmark on T-Head Yitian-710 2.75 GHz, the data comes from the 218 mode of tcrypt. The abscissas are blocks of different lengths. The data is tabulated and the unit is Mb/s: sm4-generic | 16 64 128 256 1024 1420 4096 ECB enc | 80.05 91.42 93.66 94.77 95.69 95.77 95.86 ECB dec | 79.98 91.41 93.64 94.76 95.66 95.77 95.85 CBC enc | 78.55 86.50 88.02 88.77 89.36 89.42 89.48 CBC dec | 76.82 89.06 91.52 92.77 93.75 93.83 93.96 CFB enc | 77.64 86.13 87.62 88.42 89.08 88.83 89.18 CFB dec | 77.57 88.34 90.36 91.45 92.34 92.00 92.44 CTR enc | 77.80 88.28 90.23 91.22 92.11 91.81 92.25 CTR dec | 77.83 88.22 90.22 91.22 92.04 91.82 92.28 sm4-neon ECB enc | 28.31 112.77 203.03 209.89 215.49 202.11 210.59 ECB dec | 28.36 113.45 203.23 210.00 215.52 202.13 210.65 CBC enc | 79.32 87.02 88.51 89.28 89.85 89.89 89.97 CBC dec | 28.29 112.20 203.30 209.82 214.99 201.51 209.95 CFB enc | 79.59 87.16 88.54 89.30 89.83 89.62 89.92 CFB dec | 28.12 111.05 202.47 209.02 214.21 210.90 209.12 CTR enc | 28.04 108.81 200.62 206.65 211.78 208.78 206.74 CTR dec | 28.02 108.82 200.45 206.62 211.78 208.74 206.70 sm4-ce-cipher ECB enc | 336.79 587.13 682.70 747.37 803.75 811.52 818.06 ECB dec | 339.18 584.52 679.72 743.68 798.82 803.83 811.54 CBC enc | 316.63 521.47 597.00 647.14 690.82 695.21 700.55 CBC dec | 291.80 503.79 585.66 640.82 689.86 695.16 701.72 CFB enc | 294.79 482.31 552.13 594.71 631.60 628.91 638.92 CFB dec | 293.09 466.44 526.56 563.17 594.41 592.26 601.97 CTR enc | 309.61 506.13 576.86 620.47 656.38 654.51 665.10 CTR dec | 306.69 505.57 576.84 620.18 657.09 654.52 665.32 sm4-ce ECB enc | 366.96 1329.81 2024.29 2755.50 3790.07 3861.91 4051.40 ECB dec | 367.30 1323.93 2018.72 2747.43 3787.39 3862.55 4052.62 CBC enc | 358.09 682.68 807.24 885.35 958.29 963.60 973.73 CBC dec | 366.51 1303.63 1978.64 2667.93 3624.53 3683.41 3856.08 CFB enc | 351.51 681.26 807.81 893.10 968.54 969.17 985.83 CFB dec | 354.98 1266.61 1929.63 2634.81 3614.23 3611.59 3841.68 CTR enc | 324.23 1121.25 1689.44 2256.70 2981.90 3007.79 3060.74 CTR dec | 324.18 1120.44 1694.31 2258.32 2982.01 3010.09 3060.99 Signed-off-by: Tianjia Zhang <tianjia.zhang@linux.alibaba.com> Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
2022-03-15 17:44:54 +08:00
#define inc_le128(vctr) \
mov vctr.d[1], x8; \
mov vctr.d[0], x7; \
adds x8, x8, #1; \
rev64 vctr.16b, vctr.16b; \
adc x7, x7, xzr;
crypto: arm64/sm4 - add ARMv8 Crypto Extensions implementation This adds ARMv8 implementations of SM4 in ECB, CBC, CFB and CTR modes using Crypto Extensions, also includes key expansion operations because the Crypto Extensions instruction is much faster than software implementations. The Crypto Extensions for SM4 can only run on ARMv8 implementations that have support for these optional extensions. Benchmark on T-Head Yitian-710 2.75 GHz, the data comes from the 218 mode of tcrypt. The abscissas are blocks of different lengths. The data is tabulated and the unit is Mb/s: sm4-generic | 16 64 128 256 1024 1420 4096 ECB enc | 80.05 91.42 93.66 94.77 95.69 95.77 95.86 ECB dec | 79.98 91.41 93.64 94.76 95.66 95.77 95.85 CBC enc | 78.55 86.50 88.02 88.77 89.36 89.42 89.48 CBC dec | 76.82 89.06 91.52 92.77 93.75 93.83 93.96 CFB enc | 77.64 86.13 87.62 88.42 89.08 88.83 89.18 CFB dec | 77.57 88.34 90.36 91.45 92.34 92.00 92.44 CTR enc | 77.80 88.28 90.23 91.22 92.11 91.81 92.25 CTR dec | 77.83 88.22 90.22 91.22 92.04 91.82 92.28 sm4-neon ECB enc | 28.31 112.77 203.03 209.89 215.49 202.11 210.59 ECB dec | 28.36 113.45 203.23 210.00 215.52 202.13 210.65 CBC enc | 79.32 87.02 88.51 89.28 89.85 89.89 89.97 CBC dec | 28.29 112.20 203.30 209.82 214.99 201.51 209.95 CFB enc | 79.59 87.16 88.54 89.30 89.83 89.62 89.92 CFB dec | 28.12 111.05 202.47 209.02 214.21 210.90 209.12 CTR enc | 28.04 108.81 200.62 206.65 211.78 208.78 206.74 CTR dec | 28.02 108.82 200.45 206.62 211.78 208.74 206.70 sm4-ce-cipher ECB enc | 336.79 587.13 682.70 747.37 803.75 811.52 818.06 ECB dec | 339.18 584.52 679.72 743.68 798.82 803.83 811.54 CBC enc | 316.63 521.47 597.00 647.14 690.82 695.21 700.55 CBC dec | 291.80 503.79 585.66 640.82 689.86 695.16 701.72 CFB enc | 294.79 482.31 552.13 594.71 631.60 628.91 638.92 CFB dec | 293.09 466.44 526.56 563.17 594.41 592.26 601.97 CTR enc | 309.61 506.13 576.86 620.47 656.38 654.51 665.10 CTR dec | 306.69 505.57 576.84 620.18 657.09 654.52 665.32 sm4-ce ECB enc | 366.96 1329.81 2024.29 2755.50 3790.07 3861.91 4051.40 ECB dec | 367.30 1323.93 2018.72 2747.43 3787.39 3862.55 4052.62 CBC enc | 358.09 682.68 807.24 885.35 958.29 963.60 973.73 CBC dec | 366.51 1303.63 1978.64 2667.93 3624.53 3683.41 3856.08 CFB enc | 351.51 681.26 807.81 893.10 968.54 969.17 985.83 CFB dec | 354.98 1266.61 1929.63 2634.81 3614.23 3611.59 3841.68 CTR enc | 324.23 1121.25 1689.44 2256.70 2981.90 3007.79 3060.74 CTR dec | 324.18 1120.44 1694.31 2258.32 2982.01 3010.09 3060.99 Signed-off-by: Tianjia Zhang <tianjia.zhang@linux.alibaba.com> Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
2022-03-15 17:44:54 +08:00
/* construct CTRs */
inc_le128(v0) /* +0 */
inc_le128(v1) /* +1 */
inc_le128(v2) /* +2 */
inc_le128(v3) /* +3 */
inc_le128(v4) /* +4 */
inc_le128(v5) /* +5 */
inc_le128(v6) /* +6 */
inc_le128(v7) /* +7 */
ld1 {v8.16b-v11.16b}, [x2], #64
ld1 {v12.16b-v15.16b}, [x2], #64
SM4_CRYPT_BLK8(v0, v1, v2, v3, v4, v5, v6, v7)
eor v0.16b, v0.16b, v8.16b
eor v1.16b, v1.16b, v9.16b
eor v2.16b, v2.16b, v10.16b
eor v3.16b, v3.16b, v11.16b
eor v4.16b, v4.16b, v12.16b
eor v5.16b, v5.16b, v13.16b
eor v6.16b, v6.16b, v14.16b
eor v7.16b, v7.16b, v15.16b
st1 {v0.16b-v3.16b}, [x1], #64
st1 {v4.16b-v7.16b}, [x1], #64
cbz w4, .Lctr_end
b .Lctr_loop_8x
.Lctr_4x:
add w4, w4, #8
cmp w4, #4
blt .Lctr_loop_1x
sub w4, w4, #4
crypto: arm64/sm4 - add ARMv8 Crypto Extensions implementation This adds ARMv8 implementations of SM4 in ECB, CBC, CFB and CTR modes using Crypto Extensions, also includes key expansion operations because the Crypto Extensions instruction is much faster than software implementations. The Crypto Extensions for SM4 can only run on ARMv8 implementations that have support for these optional extensions. Benchmark on T-Head Yitian-710 2.75 GHz, the data comes from the 218 mode of tcrypt. The abscissas are blocks of different lengths. The data is tabulated and the unit is Mb/s: sm4-generic | 16 64 128 256 1024 1420 4096 ECB enc | 80.05 91.42 93.66 94.77 95.69 95.77 95.86 ECB dec | 79.98 91.41 93.64 94.76 95.66 95.77 95.85 CBC enc | 78.55 86.50 88.02 88.77 89.36 89.42 89.48 CBC dec | 76.82 89.06 91.52 92.77 93.75 93.83 93.96 CFB enc | 77.64 86.13 87.62 88.42 89.08 88.83 89.18 CFB dec | 77.57 88.34 90.36 91.45 92.34 92.00 92.44 CTR enc | 77.80 88.28 90.23 91.22 92.11 91.81 92.25 CTR dec | 77.83 88.22 90.22 91.22 92.04 91.82 92.28 sm4-neon ECB enc | 28.31 112.77 203.03 209.89 215.49 202.11 210.59 ECB dec | 28.36 113.45 203.23 210.00 215.52 202.13 210.65 CBC enc | 79.32 87.02 88.51 89.28 89.85 89.89 89.97 CBC dec | 28.29 112.20 203.30 209.82 214.99 201.51 209.95 CFB enc | 79.59 87.16 88.54 89.30 89.83 89.62 89.92 CFB dec | 28.12 111.05 202.47 209.02 214.21 210.90 209.12 CTR enc | 28.04 108.81 200.62 206.65 211.78 208.78 206.74 CTR dec | 28.02 108.82 200.45 206.62 211.78 208.74 206.70 sm4-ce-cipher ECB enc | 336.79 587.13 682.70 747.37 803.75 811.52 818.06 ECB dec | 339.18 584.52 679.72 743.68 798.82 803.83 811.54 CBC enc | 316.63 521.47 597.00 647.14 690.82 695.21 700.55 CBC dec | 291.80 503.79 585.66 640.82 689.86 695.16 701.72 CFB enc | 294.79 482.31 552.13 594.71 631.60 628.91 638.92 CFB dec | 293.09 466.44 526.56 563.17 594.41 592.26 601.97 CTR enc | 309.61 506.13 576.86 620.47 656.38 654.51 665.10 CTR dec | 306.69 505.57 576.84 620.18 657.09 654.52 665.32 sm4-ce ECB enc | 366.96 1329.81 2024.29 2755.50 3790.07 3861.91 4051.40 ECB dec | 367.30 1323.93 2018.72 2747.43 3787.39 3862.55 4052.62 CBC enc | 358.09 682.68 807.24 885.35 958.29 963.60 973.73 CBC dec | 366.51 1303.63 1978.64 2667.93 3624.53 3683.41 3856.08 CFB enc | 351.51 681.26 807.81 893.10 968.54 969.17 985.83 CFB dec | 354.98 1266.61 1929.63 2634.81 3614.23 3611.59 3841.68 CTR enc | 324.23 1121.25 1689.44 2256.70 2981.90 3007.79 3060.74 CTR dec | 324.18 1120.44 1694.31 2258.32 2982.01 3010.09 3060.99 Signed-off-by: Tianjia Zhang <tianjia.zhang@linux.alibaba.com> Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
2022-03-15 17:44:54 +08:00
/* construct CTRs */
inc_le128(v0) /* +0 */
inc_le128(v1) /* +1 */
inc_le128(v2) /* +2 */
inc_le128(v3) /* +3 */
crypto: arm64/sm4 - add ARMv8 Crypto Extensions implementation This adds ARMv8 implementations of SM4 in ECB, CBC, CFB and CTR modes using Crypto Extensions, also includes key expansion operations because the Crypto Extensions instruction is much faster than software implementations. The Crypto Extensions for SM4 can only run on ARMv8 implementations that have support for these optional extensions. Benchmark on T-Head Yitian-710 2.75 GHz, the data comes from the 218 mode of tcrypt. The abscissas are blocks of different lengths. The data is tabulated and the unit is Mb/s: sm4-generic | 16 64 128 256 1024 1420 4096 ECB enc | 80.05 91.42 93.66 94.77 95.69 95.77 95.86 ECB dec | 79.98 91.41 93.64 94.76 95.66 95.77 95.85 CBC enc | 78.55 86.50 88.02 88.77 89.36 89.42 89.48 CBC dec | 76.82 89.06 91.52 92.77 93.75 93.83 93.96 CFB enc | 77.64 86.13 87.62 88.42 89.08 88.83 89.18 CFB dec | 77.57 88.34 90.36 91.45 92.34 92.00 92.44 CTR enc | 77.80 88.28 90.23 91.22 92.11 91.81 92.25 CTR dec | 77.83 88.22 90.22 91.22 92.04 91.82 92.28 sm4-neon ECB enc | 28.31 112.77 203.03 209.89 215.49 202.11 210.59 ECB dec | 28.36 113.45 203.23 210.00 215.52 202.13 210.65 CBC enc | 79.32 87.02 88.51 89.28 89.85 89.89 89.97 CBC dec | 28.29 112.20 203.30 209.82 214.99 201.51 209.95 CFB enc | 79.59 87.16 88.54 89.30 89.83 89.62 89.92 CFB dec | 28.12 111.05 202.47 209.02 214.21 210.90 209.12 CTR enc | 28.04 108.81 200.62 206.65 211.78 208.78 206.74 CTR dec | 28.02 108.82 200.45 206.62 211.78 208.74 206.70 sm4-ce-cipher ECB enc | 336.79 587.13 682.70 747.37 803.75 811.52 818.06 ECB dec | 339.18 584.52 679.72 743.68 798.82 803.83 811.54 CBC enc | 316.63 521.47 597.00 647.14 690.82 695.21 700.55 CBC dec | 291.80 503.79 585.66 640.82 689.86 695.16 701.72 CFB enc | 294.79 482.31 552.13 594.71 631.60 628.91 638.92 CFB dec | 293.09 466.44 526.56 563.17 594.41 592.26 601.97 CTR enc | 309.61 506.13 576.86 620.47 656.38 654.51 665.10 CTR dec | 306.69 505.57 576.84 620.18 657.09 654.52 665.32 sm4-ce ECB enc | 366.96 1329.81 2024.29 2755.50 3790.07 3861.91 4051.40 ECB dec | 367.30 1323.93 2018.72 2747.43 3787.39 3862.55 4052.62 CBC enc | 358.09 682.68 807.24 885.35 958.29 963.60 973.73 CBC dec | 366.51 1303.63 1978.64 2667.93 3624.53 3683.41 3856.08 CFB enc | 351.51 681.26 807.81 893.10 968.54 969.17 985.83 CFB dec | 354.98 1266.61 1929.63 2634.81 3614.23 3611.59 3841.68 CTR enc | 324.23 1121.25 1689.44 2256.70 2981.90 3007.79 3060.74 CTR dec | 324.18 1120.44 1694.31 2258.32 2982.01 3010.09 3060.99 Signed-off-by: Tianjia Zhang <tianjia.zhang@linux.alibaba.com> Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
2022-03-15 17:44:54 +08:00
ld1 {v8.16b-v11.16b}, [x2], #64
crypto: arm64/sm4 - add ARMv8 Crypto Extensions implementation This adds ARMv8 implementations of SM4 in ECB, CBC, CFB and CTR modes using Crypto Extensions, also includes key expansion operations because the Crypto Extensions instruction is much faster than software implementations. The Crypto Extensions for SM4 can only run on ARMv8 implementations that have support for these optional extensions. Benchmark on T-Head Yitian-710 2.75 GHz, the data comes from the 218 mode of tcrypt. The abscissas are blocks of different lengths. The data is tabulated and the unit is Mb/s: sm4-generic | 16 64 128 256 1024 1420 4096 ECB enc | 80.05 91.42 93.66 94.77 95.69 95.77 95.86 ECB dec | 79.98 91.41 93.64 94.76 95.66 95.77 95.85 CBC enc | 78.55 86.50 88.02 88.77 89.36 89.42 89.48 CBC dec | 76.82 89.06 91.52 92.77 93.75 93.83 93.96 CFB enc | 77.64 86.13 87.62 88.42 89.08 88.83 89.18 CFB dec | 77.57 88.34 90.36 91.45 92.34 92.00 92.44 CTR enc | 77.80 88.28 90.23 91.22 92.11 91.81 92.25 CTR dec | 77.83 88.22 90.22 91.22 92.04 91.82 92.28 sm4-neon ECB enc | 28.31 112.77 203.03 209.89 215.49 202.11 210.59 ECB dec | 28.36 113.45 203.23 210.00 215.52 202.13 210.65 CBC enc | 79.32 87.02 88.51 89.28 89.85 89.89 89.97 CBC dec | 28.29 112.20 203.30 209.82 214.99 201.51 209.95 CFB enc | 79.59 87.16 88.54 89.30 89.83 89.62 89.92 CFB dec | 28.12 111.05 202.47 209.02 214.21 210.90 209.12 CTR enc | 28.04 108.81 200.62 206.65 211.78 208.78 206.74 CTR dec | 28.02 108.82 200.45 206.62 211.78 208.74 206.70 sm4-ce-cipher ECB enc | 336.79 587.13 682.70 747.37 803.75 811.52 818.06 ECB dec | 339.18 584.52 679.72 743.68 798.82 803.83 811.54 CBC enc | 316.63 521.47 597.00 647.14 690.82 695.21 700.55 CBC dec | 291.80 503.79 585.66 640.82 689.86 695.16 701.72 CFB enc | 294.79 482.31 552.13 594.71 631.60 628.91 638.92 CFB dec | 293.09 466.44 526.56 563.17 594.41 592.26 601.97 CTR enc | 309.61 506.13 576.86 620.47 656.38 654.51 665.10 CTR dec | 306.69 505.57 576.84 620.18 657.09 654.52 665.32 sm4-ce ECB enc | 366.96 1329.81 2024.29 2755.50 3790.07 3861.91 4051.40 ECB dec | 367.30 1323.93 2018.72 2747.43 3787.39 3862.55 4052.62 CBC enc | 358.09 682.68 807.24 885.35 958.29 963.60 973.73 CBC dec | 366.51 1303.63 1978.64 2667.93 3624.53 3683.41 3856.08 CFB enc | 351.51 681.26 807.81 893.10 968.54 969.17 985.83 CFB dec | 354.98 1266.61 1929.63 2634.81 3614.23 3611.59 3841.68 CTR enc | 324.23 1121.25 1689.44 2256.70 2981.90 3007.79 3060.74 CTR dec | 324.18 1120.44 1694.31 2258.32 2982.01 3010.09 3060.99 Signed-off-by: Tianjia Zhang <tianjia.zhang@linux.alibaba.com> Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
2022-03-15 17:44:54 +08:00
SM4_CRYPT_BLK4(v0, v1, v2, v3)
crypto: arm64/sm4 - add ARMv8 Crypto Extensions implementation This adds ARMv8 implementations of SM4 in ECB, CBC, CFB and CTR modes using Crypto Extensions, also includes key expansion operations because the Crypto Extensions instruction is much faster than software implementations. The Crypto Extensions for SM4 can only run on ARMv8 implementations that have support for these optional extensions. Benchmark on T-Head Yitian-710 2.75 GHz, the data comes from the 218 mode of tcrypt. The abscissas are blocks of different lengths. The data is tabulated and the unit is Mb/s: sm4-generic | 16 64 128 256 1024 1420 4096 ECB enc | 80.05 91.42 93.66 94.77 95.69 95.77 95.86 ECB dec | 79.98 91.41 93.64 94.76 95.66 95.77 95.85 CBC enc | 78.55 86.50 88.02 88.77 89.36 89.42 89.48 CBC dec | 76.82 89.06 91.52 92.77 93.75 93.83 93.96 CFB enc | 77.64 86.13 87.62 88.42 89.08 88.83 89.18 CFB dec | 77.57 88.34 90.36 91.45 92.34 92.00 92.44 CTR enc | 77.80 88.28 90.23 91.22 92.11 91.81 92.25 CTR dec | 77.83 88.22 90.22 91.22 92.04 91.82 92.28 sm4-neon ECB enc | 28.31 112.77 203.03 209.89 215.49 202.11 210.59 ECB dec | 28.36 113.45 203.23 210.00 215.52 202.13 210.65 CBC enc | 79.32 87.02 88.51 89.28 89.85 89.89 89.97 CBC dec | 28.29 112.20 203.30 209.82 214.99 201.51 209.95 CFB enc | 79.59 87.16 88.54 89.30 89.83 89.62 89.92 CFB dec | 28.12 111.05 202.47 209.02 214.21 210.90 209.12 CTR enc | 28.04 108.81 200.62 206.65 211.78 208.78 206.74 CTR dec | 28.02 108.82 200.45 206.62 211.78 208.74 206.70 sm4-ce-cipher ECB enc | 336.79 587.13 682.70 747.37 803.75 811.52 818.06 ECB dec | 339.18 584.52 679.72 743.68 798.82 803.83 811.54 CBC enc | 316.63 521.47 597.00 647.14 690.82 695.21 700.55 CBC dec | 291.80 503.79 585.66 640.82 689.86 695.16 701.72 CFB enc | 294.79 482.31 552.13 594.71 631.60 628.91 638.92 CFB dec | 293.09 466.44 526.56 563.17 594.41 592.26 601.97 CTR enc | 309.61 506.13 576.86 620.47 656.38 654.51 665.10 CTR dec | 306.69 505.57 576.84 620.18 657.09 654.52 665.32 sm4-ce ECB enc | 366.96 1329.81 2024.29 2755.50 3790.07 3861.91 4051.40 ECB dec | 367.30 1323.93 2018.72 2747.43 3787.39 3862.55 4052.62 CBC enc | 358.09 682.68 807.24 885.35 958.29 963.60 973.73 CBC dec | 366.51 1303.63 1978.64 2667.93 3624.53 3683.41 3856.08 CFB enc | 351.51 681.26 807.81 893.10 968.54 969.17 985.83 CFB dec | 354.98 1266.61 1929.63 2634.81 3614.23 3611.59 3841.68 CTR enc | 324.23 1121.25 1689.44 2256.70 2981.90 3007.79 3060.74 CTR dec | 324.18 1120.44 1694.31 2258.32 2982.01 3010.09 3060.99 Signed-off-by: Tianjia Zhang <tianjia.zhang@linux.alibaba.com> Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
2022-03-15 17:44:54 +08:00
eor v0.16b, v0.16b, v8.16b
eor v1.16b, v1.16b, v9.16b
eor v2.16b, v2.16b, v10.16b
eor v3.16b, v3.16b, v11.16b
crypto: arm64/sm4 - add ARMv8 Crypto Extensions implementation This adds ARMv8 implementations of SM4 in ECB, CBC, CFB and CTR modes using Crypto Extensions, also includes key expansion operations because the Crypto Extensions instruction is much faster than software implementations. The Crypto Extensions for SM4 can only run on ARMv8 implementations that have support for these optional extensions. Benchmark on T-Head Yitian-710 2.75 GHz, the data comes from the 218 mode of tcrypt. The abscissas are blocks of different lengths. The data is tabulated and the unit is Mb/s: sm4-generic | 16 64 128 256 1024 1420 4096 ECB enc | 80.05 91.42 93.66 94.77 95.69 95.77 95.86 ECB dec | 79.98 91.41 93.64 94.76 95.66 95.77 95.85 CBC enc | 78.55 86.50 88.02 88.77 89.36 89.42 89.48 CBC dec | 76.82 89.06 91.52 92.77 93.75 93.83 93.96 CFB enc | 77.64 86.13 87.62 88.42 89.08 88.83 89.18 CFB dec | 77.57 88.34 90.36 91.45 92.34 92.00 92.44 CTR enc | 77.80 88.28 90.23 91.22 92.11 91.81 92.25 CTR dec | 77.83 88.22 90.22 91.22 92.04 91.82 92.28 sm4-neon ECB enc | 28.31 112.77 203.03 209.89 215.49 202.11 210.59 ECB dec | 28.36 113.45 203.23 210.00 215.52 202.13 210.65 CBC enc | 79.32 87.02 88.51 89.28 89.85 89.89 89.97 CBC dec | 28.29 112.20 203.30 209.82 214.99 201.51 209.95 CFB enc | 79.59 87.16 88.54 89.30 89.83 89.62 89.92 CFB dec | 28.12 111.05 202.47 209.02 214.21 210.90 209.12 CTR enc | 28.04 108.81 200.62 206.65 211.78 208.78 206.74 CTR dec | 28.02 108.82 200.45 206.62 211.78 208.74 206.70 sm4-ce-cipher ECB enc | 336.79 587.13 682.70 747.37 803.75 811.52 818.06 ECB dec | 339.18 584.52 679.72 743.68 798.82 803.83 811.54 CBC enc | 316.63 521.47 597.00 647.14 690.82 695.21 700.55 CBC dec | 291.80 503.79 585.66 640.82 689.86 695.16 701.72 CFB enc | 294.79 482.31 552.13 594.71 631.60 628.91 638.92 CFB dec | 293.09 466.44 526.56 563.17 594.41 592.26 601.97 CTR enc | 309.61 506.13 576.86 620.47 656.38 654.51 665.10 CTR dec | 306.69 505.57 576.84 620.18 657.09 654.52 665.32 sm4-ce ECB enc | 366.96 1329.81 2024.29 2755.50 3790.07 3861.91 4051.40 ECB dec | 367.30 1323.93 2018.72 2747.43 3787.39 3862.55 4052.62 CBC enc | 358.09 682.68 807.24 885.35 958.29 963.60 973.73 CBC dec | 366.51 1303.63 1978.64 2667.93 3624.53 3683.41 3856.08 CFB enc | 351.51 681.26 807.81 893.10 968.54 969.17 985.83 CFB dec | 354.98 1266.61 1929.63 2634.81 3614.23 3611.59 3841.68 CTR enc | 324.23 1121.25 1689.44 2256.70 2981.90 3007.79 3060.74 CTR dec | 324.18 1120.44 1694.31 2258.32 2982.01 3010.09 3060.99 Signed-off-by: Tianjia Zhang <tianjia.zhang@linux.alibaba.com> Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
2022-03-15 17:44:54 +08:00
st1 {v0.16b-v3.16b}, [x1], #64
crypto: arm64/sm4 - add ARMv8 Crypto Extensions implementation This adds ARMv8 implementations of SM4 in ECB, CBC, CFB and CTR modes using Crypto Extensions, also includes key expansion operations because the Crypto Extensions instruction is much faster than software implementations. The Crypto Extensions for SM4 can only run on ARMv8 implementations that have support for these optional extensions. Benchmark on T-Head Yitian-710 2.75 GHz, the data comes from the 218 mode of tcrypt. The abscissas are blocks of different lengths. The data is tabulated and the unit is Mb/s: sm4-generic | 16 64 128 256 1024 1420 4096 ECB enc | 80.05 91.42 93.66 94.77 95.69 95.77 95.86 ECB dec | 79.98 91.41 93.64 94.76 95.66 95.77 95.85 CBC enc | 78.55 86.50 88.02 88.77 89.36 89.42 89.48 CBC dec | 76.82 89.06 91.52 92.77 93.75 93.83 93.96 CFB enc | 77.64 86.13 87.62 88.42 89.08 88.83 89.18 CFB dec | 77.57 88.34 90.36 91.45 92.34 92.00 92.44 CTR enc | 77.80 88.28 90.23 91.22 92.11 91.81 92.25 CTR dec | 77.83 88.22 90.22 91.22 92.04 91.82 92.28 sm4-neon ECB enc | 28.31 112.77 203.03 209.89 215.49 202.11 210.59 ECB dec | 28.36 113.45 203.23 210.00 215.52 202.13 210.65 CBC enc | 79.32 87.02 88.51 89.28 89.85 89.89 89.97 CBC dec | 28.29 112.20 203.30 209.82 214.99 201.51 209.95 CFB enc | 79.59 87.16 88.54 89.30 89.83 89.62 89.92 CFB dec | 28.12 111.05 202.47 209.02 214.21 210.90 209.12 CTR enc | 28.04 108.81 200.62 206.65 211.78 208.78 206.74 CTR dec | 28.02 108.82 200.45 206.62 211.78 208.74 206.70 sm4-ce-cipher ECB enc | 336.79 587.13 682.70 747.37 803.75 811.52 818.06 ECB dec | 339.18 584.52 679.72 743.68 798.82 803.83 811.54 CBC enc | 316.63 521.47 597.00 647.14 690.82 695.21 700.55 CBC dec | 291.80 503.79 585.66 640.82 689.86 695.16 701.72 CFB enc | 294.79 482.31 552.13 594.71 631.60 628.91 638.92 CFB dec | 293.09 466.44 526.56 563.17 594.41 592.26 601.97 CTR enc | 309.61 506.13 576.86 620.47 656.38 654.51 665.10 CTR dec | 306.69 505.57 576.84 620.18 657.09 654.52 665.32 sm4-ce ECB enc | 366.96 1329.81 2024.29 2755.50 3790.07 3861.91 4051.40 ECB dec | 367.30 1323.93 2018.72 2747.43 3787.39 3862.55 4052.62 CBC enc | 358.09 682.68 807.24 885.35 958.29 963.60 973.73 CBC dec | 366.51 1303.63 1978.64 2667.93 3624.53 3683.41 3856.08 CFB enc | 351.51 681.26 807.81 893.10 968.54 969.17 985.83 CFB dec | 354.98 1266.61 1929.63 2634.81 3614.23 3611.59 3841.68 CTR enc | 324.23 1121.25 1689.44 2256.70 2981.90 3007.79 3060.74 CTR dec | 324.18 1120.44 1694.31 2258.32 2982.01 3010.09 3060.99 Signed-off-by: Tianjia Zhang <tianjia.zhang@linux.alibaba.com> Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
2022-03-15 17:44:54 +08:00
cbz w4, .Lctr_end
crypto: arm64/sm4 - add ARMv8 Crypto Extensions implementation This adds ARMv8 implementations of SM4 in ECB, CBC, CFB and CTR modes using Crypto Extensions, also includes key expansion operations because the Crypto Extensions instruction is much faster than software implementations. The Crypto Extensions for SM4 can only run on ARMv8 implementations that have support for these optional extensions. Benchmark on T-Head Yitian-710 2.75 GHz, the data comes from the 218 mode of tcrypt. The abscissas are blocks of different lengths. The data is tabulated and the unit is Mb/s: sm4-generic | 16 64 128 256 1024 1420 4096 ECB enc | 80.05 91.42 93.66 94.77 95.69 95.77 95.86 ECB dec | 79.98 91.41 93.64 94.76 95.66 95.77 95.85 CBC enc | 78.55 86.50 88.02 88.77 89.36 89.42 89.48 CBC dec | 76.82 89.06 91.52 92.77 93.75 93.83 93.96 CFB enc | 77.64 86.13 87.62 88.42 89.08 88.83 89.18 CFB dec | 77.57 88.34 90.36 91.45 92.34 92.00 92.44 CTR enc | 77.80 88.28 90.23 91.22 92.11 91.81 92.25 CTR dec | 77.83 88.22 90.22 91.22 92.04 91.82 92.28 sm4-neon ECB enc | 28.31 112.77 203.03 209.89 215.49 202.11 210.59 ECB dec | 28.36 113.45 203.23 210.00 215.52 202.13 210.65 CBC enc | 79.32 87.02 88.51 89.28 89.85 89.89 89.97 CBC dec | 28.29 112.20 203.30 209.82 214.99 201.51 209.95 CFB enc | 79.59 87.16 88.54 89.30 89.83 89.62 89.92 CFB dec | 28.12 111.05 202.47 209.02 214.21 210.90 209.12 CTR enc | 28.04 108.81 200.62 206.65 211.78 208.78 206.74 CTR dec | 28.02 108.82 200.45 206.62 211.78 208.74 206.70 sm4-ce-cipher ECB enc | 336.79 587.13 682.70 747.37 803.75 811.52 818.06 ECB dec | 339.18 584.52 679.72 743.68 798.82 803.83 811.54 CBC enc | 316.63 521.47 597.00 647.14 690.82 695.21 700.55 CBC dec | 291.80 503.79 585.66 640.82 689.86 695.16 701.72 CFB enc | 294.79 482.31 552.13 594.71 631.60 628.91 638.92 CFB dec | 293.09 466.44 526.56 563.17 594.41 592.26 601.97 CTR enc | 309.61 506.13 576.86 620.47 656.38 654.51 665.10 CTR dec | 306.69 505.57 576.84 620.18 657.09 654.52 665.32 sm4-ce ECB enc | 366.96 1329.81 2024.29 2755.50 3790.07 3861.91 4051.40 ECB dec | 367.30 1323.93 2018.72 2747.43 3787.39 3862.55 4052.62 CBC enc | 358.09 682.68 807.24 885.35 958.29 963.60 973.73 CBC dec | 366.51 1303.63 1978.64 2667.93 3624.53 3683.41 3856.08 CFB enc | 351.51 681.26 807.81 893.10 968.54 969.17 985.83 CFB dec | 354.98 1266.61 1929.63 2634.81 3614.23 3611.59 3841.68 CTR enc | 324.23 1121.25 1689.44 2256.70 2981.90 3007.79 3060.74 CTR dec | 324.18 1120.44 1694.31 2258.32 2982.01 3010.09 3060.99 Signed-off-by: Tianjia Zhang <tianjia.zhang@linux.alibaba.com> Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
2022-03-15 17:44:54 +08:00
.Lctr_loop_1x:
sub w4, w4, #1
crypto: arm64/sm4 - add ARMv8 Crypto Extensions implementation This adds ARMv8 implementations of SM4 in ECB, CBC, CFB and CTR modes using Crypto Extensions, also includes key expansion operations because the Crypto Extensions instruction is much faster than software implementations. The Crypto Extensions for SM4 can only run on ARMv8 implementations that have support for these optional extensions. Benchmark on T-Head Yitian-710 2.75 GHz, the data comes from the 218 mode of tcrypt. The abscissas are blocks of different lengths. The data is tabulated and the unit is Mb/s: sm4-generic | 16 64 128 256 1024 1420 4096 ECB enc | 80.05 91.42 93.66 94.77 95.69 95.77 95.86 ECB dec | 79.98 91.41 93.64 94.76 95.66 95.77 95.85 CBC enc | 78.55 86.50 88.02 88.77 89.36 89.42 89.48 CBC dec | 76.82 89.06 91.52 92.77 93.75 93.83 93.96 CFB enc | 77.64 86.13 87.62 88.42 89.08 88.83 89.18 CFB dec | 77.57 88.34 90.36 91.45 92.34 92.00 92.44 CTR enc | 77.80 88.28 90.23 91.22 92.11 91.81 92.25 CTR dec | 77.83 88.22 90.22 91.22 92.04 91.82 92.28 sm4-neon ECB enc | 28.31 112.77 203.03 209.89 215.49 202.11 210.59 ECB dec | 28.36 113.45 203.23 210.00 215.52 202.13 210.65 CBC enc | 79.32 87.02 88.51 89.28 89.85 89.89 89.97 CBC dec | 28.29 112.20 203.30 209.82 214.99 201.51 209.95 CFB enc | 79.59 87.16 88.54 89.30 89.83 89.62 89.92 CFB dec | 28.12 111.05 202.47 209.02 214.21 210.90 209.12 CTR enc | 28.04 108.81 200.62 206.65 211.78 208.78 206.74 CTR dec | 28.02 108.82 200.45 206.62 211.78 208.74 206.70 sm4-ce-cipher ECB enc | 336.79 587.13 682.70 747.37 803.75 811.52 818.06 ECB dec | 339.18 584.52 679.72 743.68 798.82 803.83 811.54 CBC enc | 316.63 521.47 597.00 647.14 690.82 695.21 700.55 CBC dec | 291.80 503.79 585.66 640.82 689.86 695.16 701.72 CFB enc | 294.79 482.31 552.13 594.71 631.60 628.91 638.92 CFB dec | 293.09 466.44 526.56 563.17 594.41 592.26 601.97 CTR enc | 309.61 506.13 576.86 620.47 656.38 654.51 665.10 CTR dec | 306.69 505.57 576.84 620.18 657.09 654.52 665.32 sm4-ce ECB enc | 366.96 1329.81 2024.29 2755.50 3790.07 3861.91 4051.40 ECB dec | 367.30 1323.93 2018.72 2747.43 3787.39 3862.55 4052.62 CBC enc | 358.09 682.68 807.24 885.35 958.29 963.60 973.73 CBC dec | 366.51 1303.63 1978.64 2667.93 3624.53 3683.41 3856.08 CFB enc | 351.51 681.26 807.81 893.10 968.54 969.17 985.83 CFB dec | 354.98 1266.61 1929.63 2634.81 3614.23 3611.59 3841.68 CTR enc | 324.23 1121.25 1689.44 2256.70 2981.90 3007.79 3060.74 CTR dec | 324.18 1120.44 1694.31 2258.32 2982.01 3010.09 3060.99 Signed-off-by: Tianjia Zhang <tianjia.zhang@linux.alibaba.com> Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
2022-03-15 17:44:54 +08:00
/* construct CTRs */
inc_le128(v0)
crypto: arm64/sm4 - add ARMv8 Crypto Extensions implementation This adds ARMv8 implementations of SM4 in ECB, CBC, CFB and CTR modes using Crypto Extensions, also includes key expansion operations because the Crypto Extensions instruction is much faster than software implementations. The Crypto Extensions for SM4 can only run on ARMv8 implementations that have support for these optional extensions. Benchmark on T-Head Yitian-710 2.75 GHz, the data comes from the 218 mode of tcrypt. The abscissas are blocks of different lengths. The data is tabulated and the unit is Mb/s: sm4-generic | 16 64 128 256 1024 1420 4096 ECB enc | 80.05 91.42 93.66 94.77 95.69 95.77 95.86 ECB dec | 79.98 91.41 93.64 94.76 95.66 95.77 95.85 CBC enc | 78.55 86.50 88.02 88.77 89.36 89.42 89.48 CBC dec | 76.82 89.06 91.52 92.77 93.75 93.83 93.96 CFB enc | 77.64 86.13 87.62 88.42 89.08 88.83 89.18 CFB dec | 77.57 88.34 90.36 91.45 92.34 92.00 92.44 CTR enc | 77.80 88.28 90.23 91.22 92.11 91.81 92.25 CTR dec | 77.83 88.22 90.22 91.22 92.04 91.82 92.28 sm4-neon ECB enc | 28.31 112.77 203.03 209.89 215.49 202.11 210.59 ECB dec | 28.36 113.45 203.23 210.00 215.52 202.13 210.65 CBC enc | 79.32 87.02 88.51 89.28 89.85 89.89 89.97 CBC dec | 28.29 112.20 203.30 209.82 214.99 201.51 209.95 CFB enc | 79.59 87.16 88.54 89.30 89.83 89.62 89.92 CFB dec | 28.12 111.05 202.47 209.02 214.21 210.90 209.12 CTR enc | 28.04 108.81 200.62 206.65 211.78 208.78 206.74 CTR dec | 28.02 108.82 200.45 206.62 211.78 208.74 206.70 sm4-ce-cipher ECB enc | 336.79 587.13 682.70 747.37 803.75 811.52 818.06 ECB dec | 339.18 584.52 679.72 743.68 798.82 803.83 811.54 CBC enc | 316.63 521.47 597.00 647.14 690.82 695.21 700.55 CBC dec | 291.80 503.79 585.66 640.82 689.86 695.16 701.72 CFB enc | 294.79 482.31 552.13 594.71 631.60 628.91 638.92 CFB dec | 293.09 466.44 526.56 563.17 594.41 592.26 601.97 CTR enc | 309.61 506.13 576.86 620.47 656.38 654.51 665.10 CTR dec | 306.69 505.57 576.84 620.18 657.09 654.52 665.32 sm4-ce ECB enc | 366.96 1329.81 2024.29 2755.50 3790.07 3861.91 4051.40 ECB dec | 367.30 1323.93 2018.72 2747.43 3787.39 3862.55 4052.62 CBC enc | 358.09 682.68 807.24 885.35 958.29 963.60 973.73 CBC dec | 366.51 1303.63 1978.64 2667.93 3624.53 3683.41 3856.08 CFB enc | 351.51 681.26 807.81 893.10 968.54 969.17 985.83 CFB dec | 354.98 1266.61 1929.63 2634.81 3614.23 3611.59 3841.68 CTR enc | 324.23 1121.25 1689.44 2256.70 2981.90 3007.79 3060.74 CTR dec | 324.18 1120.44 1694.31 2258.32 2982.01 3010.09 3060.99 Signed-off-by: Tianjia Zhang <tianjia.zhang@linux.alibaba.com> Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
2022-03-15 17:44:54 +08:00
ld1 {v8.16b}, [x2], #16
crypto: arm64/sm4 - add ARMv8 Crypto Extensions implementation This adds ARMv8 implementations of SM4 in ECB, CBC, CFB and CTR modes using Crypto Extensions, also includes key expansion operations because the Crypto Extensions instruction is much faster than software implementations. The Crypto Extensions for SM4 can only run on ARMv8 implementations that have support for these optional extensions. Benchmark on T-Head Yitian-710 2.75 GHz, the data comes from the 218 mode of tcrypt. The abscissas are blocks of different lengths. The data is tabulated and the unit is Mb/s: sm4-generic | 16 64 128 256 1024 1420 4096 ECB enc | 80.05 91.42 93.66 94.77 95.69 95.77 95.86 ECB dec | 79.98 91.41 93.64 94.76 95.66 95.77 95.85 CBC enc | 78.55 86.50 88.02 88.77 89.36 89.42 89.48 CBC dec | 76.82 89.06 91.52 92.77 93.75 93.83 93.96 CFB enc | 77.64 86.13 87.62 88.42 89.08 88.83 89.18 CFB dec | 77.57 88.34 90.36 91.45 92.34 92.00 92.44 CTR enc | 77.80 88.28 90.23 91.22 92.11 91.81 92.25 CTR dec | 77.83 88.22 90.22 91.22 92.04 91.82 92.28 sm4-neon ECB enc | 28.31 112.77 203.03 209.89 215.49 202.11 210.59 ECB dec | 28.36 113.45 203.23 210.00 215.52 202.13 210.65 CBC enc | 79.32 87.02 88.51 89.28 89.85 89.89 89.97 CBC dec | 28.29 112.20 203.30 209.82 214.99 201.51 209.95 CFB enc | 79.59 87.16 88.54 89.30 89.83 89.62 89.92 CFB dec | 28.12 111.05 202.47 209.02 214.21 210.90 209.12 CTR enc | 28.04 108.81 200.62 206.65 211.78 208.78 206.74 CTR dec | 28.02 108.82 200.45 206.62 211.78 208.74 206.70 sm4-ce-cipher ECB enc | 336.79 587.13 682.70 747.37 803.75 811.52 818.06 ECB dec | 339.18 584.52 679.72 743.68 798.82 803.83 811.54 CBC enc | 316.63 521.47 597.00 647.14 690.82 695.21 700.55 CBC dec | 291.80 503.79 585.66 640.82 689.86 695.16 701.72 CFB enc | 294.79 482.31 552.13 594.71 631.60 628.91 638.92 CFB dec | 293.09 466.44 526.56 563.17 594.41 592.26 601.97 CTR enc | 309.61 506.13 576.86 620.47 656.38 654.51 665.10 CTR dec | 306.69 505.57 576.84 620.18 657.09 654.52 665.32 sm4-ce ECB enc | 366.96 1329.81 2024.29 2755.50 3790.07 3861.91 4051.40 ECB dec | 367.30 1323.93 2018.72 2747.43 3787.39 3862.55 4052.62 CBC enc | 358.09 682.68 807.24 885.35 958.29 963.60 973.73 CBC dec | 366.51 1303.63 1978.64 2667.93 3624.53 3683.41 3856.08 CFB enc | 351.51 681.26 807.81 893.10 968.54 969.17 985.83 CFB dec | 354.98 1266.61 1929.63 2634.81 3614.23 3611.59 3841.68 CTR enc | 324.23 1121.25 1689.44 2256.70 2981.90 3007.79 3060.74 CTR dec | 324.18 1120.44 1694.31 2258.32 2982.01 3010.09 3060.99 Signed-off-by: Tianjia Zhang <tianjia.zhang@linux.alibaba.com> Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
2022-03-15 17:44:54 +08:00
SM4_CRYPT_BLK(v0)
eor v0.16b, v0.16b, v8.16b
st1 {v0.16b}, [x1], #16
crypto: arm64/sm4 - add ARMv8 Crypto Extensions implementation This adds ARMv8 implementations of SM4 in ECB, CBC, CFB and CTR modes using Crypto Extensions, also includes key expansion operations because the Crypto Extensions instruction is much faster than software implementations. The Crypto Extensions for SM4 can only run on ARMv8 implementations that have support for these optional extensions. Benchmark on T-Head Yitian-710 2.75 GHz, the data comes from the 218 mode of tcrypt. The abscissas are blocks of different lengths. The data is tabulated and the unit is Mb/s: sm4-generic | 16 64 128 256 1024 1420 4096 ECB enc | 80.05 91.42 93.66 94.77 95.69 95.77 95.86 ECB dec | 79.98 91.41 93.64 94.76 95.66 95.77 95.85 CBC enc | 78.55 86.50 88.02 88.77 89.36 89.42 89.48 CBC dec | 76.82 89.06 91.52 92.77 93.75 93.83 93.96 CFB enc | 77.64 86.13 87.62 88.42 89.08 88.83 89.18 CFB dec | 77.57 88.34 90.36 91.45 92.34 92.00 92.44 CTR enc | 77.80 88.28 90.23 91.22 92.11 91.81 92.25 CTR dec | 77.83 88.22 90.22 91.22 92.04 91.82 92.28 sm4-neon ECB enc | 28.31 112.77 203.03 209.89 215.49 202.11 210.59 ECB dec | 28.36 113.45 203.23 210.00 215.52 202.13 210.65 CBC enc | 79.32 87.02 88.51 89.28 89.85 89.89 89.97 CBC dec | 28.29 112.20 203.30 209.82 214.99 201.51 209.95 CFB enc | 79.59 87.16 88.54 89.30 89.83 89.62 89.92 CFB dec | 28.12 111.05 202.47 209.02 214.21 210.90 209.12 CTR enc | 28.04 108.81 200.62 206.65 211.78 208.78 206.74 CTR dec | 28.02 108.82 200.45 206.62 211.78 208.74 206.70 sm4-ce-cipher ECB enc | 336.79 587.13 682.70 747.37 803.75 811.52 818.06 ECB dec | 339.18 584.52 679.72 743.68 798.82 803.83 811.54 CBC enc | 316.63 521.47 597.00 647.14 690.82 695.21 700.55 CBC dec | 291.80 503.79 585.66 640.82 689.86 695.16 701.72 CFB enc | 294.79 482.31 552.13 594.71 631.60 628.91 638.92 CFB dec | 293.09 466.44 526.56 563.17 594.41 592.26 601.97 CTR enc | 309.61 506.13 576.86 620.47 656.38 654.51 665.10 CTR dec | 306.69 505.57 576.84 620.18 657.09 654.52 665.32 sm4-ce ECB enc | 366.96 1329.81 2024.29 2755.50 3790.07 3861.91 4051.40 ECB dec | 367.30 1323.93 2018.72 2747.43 3787.39 3862.55 4052.62 CBC enc | 358.09 682.68 807.24 885.35 958.29 963.60 973.73 CBC dec | 366.51 1303.63 1978.64 2667.93 3624.53 3683.41 3856.08 CFB enc | 351.51 681.26 807.81 893.10 968.54 969.17 985.83 CFB dec | 354.98 1266.61 1929.63 2634.81 3614.23 3611.59 3841.68 CTR enc | 324.23 1121.25 1689.44 2256.70 2981.90 3007.79 3060.74 CTR dec | 324.18 1120.44 1694.31 2258.32 2982.01 3010.09 3060.99 Signed-off-by: Tianjia Zhang <tianjia.zhang@linux.alibaba.com> Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
2022-03-15 17:44:54 +08:00
cbnz w4, .Lctr_loop_1x
crypto: arm64/sm4 - add ARMv8 Crypto Extensions implementation This adds ARMv8 implementations of SM4 in ECB, CBC, CFB and CTR modes using Crypto Extensions, also includes key expansion operations because the Crypto Extensions instruction is much faster than software implementations. The Crypto Extensions for SM4 can only run on ARMv8 implementations that have support for these optional extensions. Benchmark on T-Head Yitian-710 2.75 GHz, the data comes from the 218 mode of tcrypt. The abscissas are blocks of different lengths. The data is tabulated and the unit is Mb/s: sm4-generic | 16 64 128 256 1024 1420 4096 ECB enc | 80.05 91.42 93.66 94.77 95.69 95.77 95.86 ECB dec | 79.98 91.41 93.64 94.76 95.66 95.77 95.85 CBC enc | 78.55 86.50 88.02 88.77 89.36 89.42 89.48 CBC dec | 76.82 89.06 91.52 92.77 93.75 93.83 93.96 CFB enc | 77.64 86.13 87.62 88.42 89.08 88.83 89.18 CFB dec | 77.57 88.34 90.36 91.45 92.34 92.00 92.44 CTR enc | 77.80 88.28 90.23 91.22 92.11 91.81 92.25 CTR dec | 77.83 88.22 90.22 91.22 92.04 91.82 92.28 sm4-neon ECB enc | 28.31 112.77 203.03 209.89 215.49 202.11 210.59 ECB dec | 28.36 113.45 203.23 210.00 215.52 202.13 210.65 CBC enc | 79.32 87.02 88.51 89.28 89.85 89.89 89.97 CBC dec | 28.29 112.20 203.30 209.82 214.99 201.51 209.95 CFB enc | 79.59 87.16 88.54 89.30 89.83 89.62 89.92 CFB dec | 28.12 111.05 202.47 209.02 214.21 210.90 209.12 CTR enc | 28.04 108.81 200.62 206.65 211.78 208.78 206.74 CTR dec | 28.02 108.82 200.45 206.62 211.78 208.74 206.70 sm4-ce-cipher ECB enc | 336.79 587.13 682.70 747.37 803.75 811.52 818.06 ECB dec | 339.18 584.52 679.72 743.68 798.82 803.83 811.54 CBC enc | 316.63 521.47 597.00 647.14 690.82 695.21 700.55 CBC dec | 291.80 503.79 585.66 640.82 689.86 695.16 701.72 CFB enc | 294.79 482.31 552.13 594.71 631.60 628.91 638.92 CFB dec | 293.09 466.44 526.56 563.17 594.41 592.26 601.97 CTR enc | 309.61 506.13 576.86 620.47 656.38 654.51 665.10 CTR dec | 306.69 505.57 576.84 620.18 657.09 654.52 665.32 sm4-ce ECB enc | 366.96 1329.81 2024.29 2755.50 3790.07 3861.91 4051.40 ECB dec | 367.30 1323.93 2018.72 2747.43 3787.39 3862.55 4052.62 CBC enc | 358.09 682.68 807.24 885.35 958.29 963.60 973.73 CBC dec | 366.51 1303.63 1978.64 2667.93 3624.53 3683.41 3856.08 CFB enc | 351.51 681.26 807.81 893.10 968.54 969.17 985.83 CFB dec | 354.98 1266.61 1929.63 2634.81 3614.23 3611.59 3841.68 CTR enc | 324.23 1121.25 1689.44 2256.70 2981.90 3007.79 3060.74 CTR dec | 324.18 1120.44 1694.31 2258.32 2982.01 3010.09 3060.99 Signed-off-by: Tianjia Zhang <tianjia.zhang@linux.alibaba.com> Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
2022-03-15 17:44:54 +08:00
.Lctr_end:
/* store new CTR */
rev x7, x7
rev x8, x8
stp x7, x8, [x3]
crypto: arm64/sm4 - add ARMv8 Crypto Extensions implementation This adds ARMv8 implementations of SM4 in ECB, CBC, CFB and CTR modes using Crypto Extensions, also includes key expansion operations because the Crypto Extensions instruction is much faster than software implementations. The Crypto Extensions for SM4 can only run on ARMv8 implementations that have support for these optional extensions. Benchmark on T-Head Yitian-710 2.75 GHz, the data comes from the 218 mode of tcrypt. The abscissas are blocks of different lengths. The data is tabulated and the unit is Mb/s: sm4-generic | 16 64 128 256 1024 1420 4096 ECB enc | 80.05 91.42 93.66 94.77 95.69 95.77 95.86 ECB dec | 79.98 91.41 93.64 94.76 95.66 95.77 95.85 CBC enc | 78.55 86.50 88.02 88.77 89.36 89.42 89.48 CBC dec | 76.82 89.06 91.52 92.77 93.75 93.83 93.96 CFB enc | 77.64 86.13 87.62 88.42 89.08 88.83 89.18 CFB dec | 77.57 88.34 90.36 91.45 92.34 92.00 92.44 CTR enc | 77.80 88.28 90.23 91.22 92.11 91.81 92.25 CTR dec | 77.83 88.22 90.22 91.22 92.04 91.82 92.28 sm4-neon ECB enc | 28.31 112.77 203.03 209.89 215.49 202.11 210.59 ECB dec | 28.36 113.45 203.23 210.00 215.52 202.13 210.65 CBC enc | 79.32 87.02 88.51 89.28 89.85 89.89 89.97 CBC dec | 28.29 112.20 203.30 209.82 214.99 201.51 209.95 CFB enc | 79.59 87.16 88.54 89.30 89.83 89.62 89.92 CFB dec | 28.12 111.05 202.47 209.02 214.21 210.90 209.12 CTR enc | 28.04 108.81 200.62 206.65 211.78 208.78 206.74 CTR dec | 28.02 108.82 200.45 206.62 211.78 208.74 206.70 sm4-ce-cipher ECB enc | 336.79 587.13 682.70 747.37 803.75 811.52 818.06 ECB dec | 339.18 584.52 679.72 743.68 798.82 803.83 811.54 CBC enc | 316.63 521.47 597.00 647.14 690.82 695.21 700.55 CBC dec | 291.80 503.79 585.66 640.82 689.86 695.16 701.72 CFB enc | 294.79 482.31 552.13 594.71 631.60 628.91 638.92 CFB dec | 293.09 466.44 526.56 563.17 594.41 592.26 601.97 CTR enc | 309.61 506.13 576.86 620.47 656.38 654.51 665.10 CTR dec | 306.69 505.57 576.84 620.18 657.09 654.52 665.32 sm4-ce ECB enc | 366.96 1329.81 2024.29 2755.50 3790.07 3861.91 4051.40 ECB dec | 367.30 1323.93 2018.72 2747.43 3787.39 3862.55 4052.62 CBC enc | 358.09 682.68 807.24 885.35 958.29 963.60 973.73 CBC dec | 366.51 1303.63 1978.64 2667.93 3624.53 3683.41 3856.08 CFB enc | 351.51 681.26 807.81 893.10 968.54 969.17 985.83 CFB dec | 354.98 1266.61 1929.63 2634.81 3614.23 3611.59 3841.68 CTR enc | 324.23 1121.25 1689.44 2256.70 2981.90 3007.79 3060.74 CTR dec | 324.18 1120.44 1694.31 2258.32 2982.01 3010.09 3060.99 Signed-off-by: Tianjia Zhang <tianjia.zhang@linux.alibaba.com> Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
2022-03-15 17:44:54 +08:00
ret
crypto: arm64/sm4 - add ARMv8 Crypto Extensions implementation This adds ARMv8 implementations of SM4 in ECB, CBC, CFB and CTR modes using Crypto Extensions, also includes key expansion operations because the Crypto Extensions instruction is much faster than software implementations. The Crypto Extensions for SM4 can only run on ARMv8 implementations that have support for these optional extensions. Benchmark on T-Head Yitian-710 2.75 GHz, the data comes from the 218 mode of tcrypt. The abscissas are blocks of different lengths. The data is tabulated and the unit is Mb/s: sm4-generic | 16 64 128 256 1024 1420 4096 ECB enc | 80.05 91.42 93.66 94.77 95.69 95.77 95.86 ECB dec | 79.98 91.41 93.64 94.76 95.66 95.77 95.85 CBC enc | 78.55 86.50 88.02 88.77 89.36 89.42 89.48 CBC dec | 76.82 89.06 91.52 92.77 93.75 93.83 93.96 CFB enc | 77.64 86.13 87.62 88.42 89.08 88.83 89.18 CFB dec | 77.57 88.34 90.36 91.45 92.34 92.00 92.44 CTR enc | 77.80 88.28 90.23 91.22 92.11 91.81 92.25 CTR dec | 77.83 88.22 90.22 91.22 92.04 91.82 92.28 sm4-neon ECB enc | 28.31 112.77 203.03 209.89 215.49 202.11 210.59 ECB dec | 28.36 113.45 203.23 210.00 215.52 202.13 210.65 CBC enc | 79.32 87.02 88.51 89.28 89.85 89.89 89.97 CBC dec | 28.29 112.20 203.30 209.82 214.99 201.51 209.95 CFB enc | 79.59 87.16 88.54 89.30 89.83 89.62 89.92 CFB dec | 28.12 111.05 202.47 209.02 214.21 210.90 209.12 CTR enc | 28.04 108.81 200.62 206.65 211.78 208.78 206.74 CTR dec | 28.02 108.82 200.45 206.62 211.78 208.74 206.70 sm4-ce-cipher ECB enc | 336.79 587.13 682.70 747.37 803.75 811.52 818.06 ECB dec | 339.18 584.52 679.72 743.68 798.82 803.83 811.54 CBC enc | 316.63 521.47 597.00 647.14 690.82 695.21 700.55 CBC dec | 291.80 503.79 585.66 640.82 689.86 695.16 701.72 CFB enc | 294.79 482.31 552.13 594.71 631.60 628.91 638.92 CFB dec | 293.09 466.44 526.56 563.17 594.41 592.26 601.97 CTR enc | 309.61 506.13 576.86 620.47 656.38 654.51 665.10 CTR dec | 306.69 505.57 576.84 620.18 657.09 654.52 665.32 sm4-ce ECB enc | 366.96 1329.81 2024.29 2755.50 3790.07 3861.91 4051.40 ECB dec | 367.30 1323.93 2018.72 2747.43 3787.39 3862.55 4052.62 CBC enc | 358.09 682.68 807.24 885.35 958.29 963.60 973.73 CBC dec | 366.51 1303.63 1978.64 2667.93 3624.53 3683.41 3856.08 CFB enc | 351.51 681.26 807.81 893.10 968.54 969.17 985.83 CFB dec | 354.98 1266.61 1929.63 2634.81 3614.23 3611.59 3841.68 CTR enc | 324.23 1121.25 1689.44 2256.70 2981.90 3007.79 3060.74 CTR dec | 324.18 1120.44 1694.31 2258.32 2982.01 3010.09 3060.99 Signed-off-by: Tianjia Zhang <tianjia.zhang@linux.alibaba.com> Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
2022-03-15 17:44:54 +08:00
SYM_FUNC_END(sm4_ce_ctr_enc)
#define tweak_next(vt, vin, RTMP) \
sshr RTMP.2d, vin.2d, #63; \
and RTMP.16b, RTMP.16b, RMASK.16b; \
add vt.2d, vin.2d, vin.2d; \
ext RTMP.16b, RTMP.16b, RTMP.16b, #8; \
eor vt.16b, vt.16b, RTMP.16b;
.align 3
SYM_FUNC_START(sm4_ce_xts_enc)
/* input:
* x0: round key array, CTX
* x1: dst
* x2: src
* x3: tweak (big endian, 128 bit)
* w4: nbytes
* x5: round key array for IV
*/
ld1 {v8.16b}, [x3]
cbz x5, .Lxts_enc_nofirst
SM4_PREPARE(x5)
/* Generate first tweak */
SM4_CRYPT_BLK(v8)
.Lxts_enc_nofirst:
SM4_PREPARE(x0)
ands w5, w4, #15
lsr w4, w4, #4
sub w6, w4, #1
csel w4, w4, w6, eq
uxtw x5, w5
movi RMASK.2s, #0x1
movi RTMP0.2s, #0x87
uzp1 RMASK.4s, RMASK.4s, RTMP0.4s
cbz w4, .Lxts_enc_cts
.Lxts_enc_loop_8x:
sub w4, w4, #8
tbnz w4, #31, .Lxts_enc_4x
tweak_next( v9, v8, RTMP0)
tweak_next(v10, v9, RTMP1)
tweak_next(v11, v10, RTMP2)
tweak_next(v12, v11, RTMP3)
tweak_next(v13, v12, RTMP0)
tweak_next(v14, v13, RTMP1)
tweak_next(v15, v14, RTMP2)
ld1 {v0.16b-v3.16b}, [x2], #64
ld1 {v4.16b-v7.16b}, [x2], #64
eor v0.16b, v0.16b, v8.16b
eor v1.16b, v1.16b, v9.16b
eor v2.16b, v2.16b, v10.16b
eor v3.16b, v3.16b, v11.16b
eor v4.16b, v4.16b, v12.16b
eor v5.16b, v5.16b, v13.16b
eor v6.16b, v6.16b, v14.16b
eor v7.16b, v7.16b, v15.16b
SM4_CRYPT_BLK8(v0, v1, v2, v3, v4, v5, v6, v7)
eor v0.16b, v0.16b, v8.16b
eor v1.16b, v1.16b, v9.16b
eor v2.16b, v2.16b, v10.16b
eor v3.16b, v3.16b, v11.16b
eor v4.16b, v4.16b, v12.16b
eor v5.16b, v5.16b, v13.16b
eor v6.16b, v6.16b, v14.16b
eor v7.16b, v7.16b, v15.16b
st1 {v0.16b-v3.16b}, [x1], #64
st1 {v4.16b-v7.16b}, [x1], #64
tweak_next(v8, v15, RTMP3)
cbz w4, .Lxts_enc_cts
b .Lxts_enc_loop_8x
.Lxts_enc_4x:
add w4, w4, #8
cmp w4, #4
blt .Lxts_enc_loop_1x
sub w4, w4, #4
tweak_next( v9, v8, RTMP0)
tweak_next(v10, v9, RTMP1)
tweak_next(v11, v10, RTMP2)
ld1 {v0.16b-v3.16b}, [x2], #64
eor v0.16b, v0.16b, v8.16b
eor v1.16b, v1.16b, v9.16b
eor v2.16b, v2.16b, v10.16b
eor v3.16b, v3.16b, v11.16b
SM4_CRYPT_BLK4(v0, v1, v2, v3)
eor v0.16b, v0.16b, v8.16b
eor v1.16b, v1.16b, v9.16b
eor v2.16b, v2.16b, v10.16b
eor v3.16b, v3.16b, v11.16b
st1 {v0.16b-v3.16b}, [x1], #64
tweak_next(v8, v11, RTMP3)
cbz w4, .Lxts_enc_cts
.Lxts_enc_loop_1x:
sub w4, w4, #1
ld1 {v0.16b}, [x2], #16
eor v0.16b, v0.16b, v8.16b
SM4_CRYPT_BLK(v0)
eor v0.16b, v0.16b, v8.16b
st1 {v0.16b}, [x1], #16
tweak_next(v8, v8, RTMP0)
cbnz w4, .Lxts_enc_loop_1x
.Lxts_enc_cts:
cbz x5, .Lxts_enc_end
/* cipher text stealing */
tweak_next(v9, v8, RTMP0)
ld1 {v0.16b}, [x2]
eor v0.16b, v0.16b, v8.16b
SM4_CRYPT_BLK(v0)
eor v0.16b, v0.16b, v8.16b
/* load permute table */
adr_l x6, .Lcts_permute_table
add x7, x6, #32
add x6, x6, x5
sub x7, x7, x5
ld1 {v3.16b}, [x6]
ld1 {v4.16b}, [x7]
/* overlapping loads */
add x2, x2, x5
ld1 {v1.16b}, [x2]
/* create Cn from En-1 */
tbl v2.16b, {v0.16b}, v3.16b
/* padding Pn with En-1 at the end */
tbx v0.16b, {v1.16b}, v4.16b
eor v0.16b, v0.16b, v9.16b
SM4_CRYPT_BLK(v0)
eor v0.16b, v0.16b, v9.16b
/* overlapping stores */
add x5, x1, x5
st1 {v2.16b}, [x5]
st1 {v0.16b}, [x1]
b .Lxts_enc_ret
.Lxts_enc_end:
/* store new tweak */
st1 {v8.16b}, [x3]
.Lxts_enc_ret:
ret
SYM_FUNC_END(sm4_ce_xts_enc)
.align 3
SYM_FUNC_START(sm4_ce_xts_dec)
/* input:
* x0: round key array, CTX
* x1: dst
* x2: src
* x3: tweak (big endian, 128 bit)
* w4: nbytes
* x5: round key array for IV
*/
ld1 {v8.16b}, [x3]
cbz x5, .Lxts_dec_nofirst
SM4_PREPARE(x5)
/* Generate first tweak */
SM4_CRYPT_BLK(v8)
.Lxts_dec_nofirst:
SM4_PREPARE(x0)
ands w5, w4, #15
lsr w4, w4, #4
sub w6, w4, #1
csel w4, w4, w6, eq
uxtw x5, w5
movi RMASK.2s, #0x1
movi RTMP0.2s, #0x87
uzp1 RMASK.4s, RMASK.4s, RTMP0.4s
cbz w4, .Lxts_dec_cts
.Lxts_dec_loop_8x:
sub w4, w4, #8
tbnz w4, #31, .Lxts_dec_4x
tweak_next( v9, v8, RTMP0)
tweak_next(v10, v9, RTMP1)
tweak_next(v11, v10, RTMP2)
tweak_next(v12, v11, RTMP3)
tweak_next(v13, v12, RTMP0)
tweak_next(v14, v13, RTMP1)
tweak_next(v15, v14, RTMP2)
ld1 {v0.16b-v3.16b}, [x2], #64
ld1 {v4.16b-v7.16b}, [x2], #64
eor v0.16b, v0.16b, v8.16b
eor v1.16b, v1.16b, v9.16b
eor v2.16b, v2.16b, v10.16b
eor v3.16b, v3.16b, v11.16b
eor v4.16b, v4.16b, v12.16b
eor v5.16b, v5.16b, v13.16b
eor v6.16b, v6.16b, v14.16b
eor v7.16b, v7.16b, v15.16b
SM4_CRYPT_BLK8(v0, v1, v2, v3, v4, v5, v6, v7)
eor v0.16b, v0.16b, v8.16b
eor v1.16b, v1.16b, v9.16b
eor v2.16b, v2.16b, v10.16b
eor v3.16b, v3.16b, v11.16b
eor v4.16b, v4.16b, v12.16b
eor v5.16b, v5.16b, v13.16b
eor v6.16b, v6.16b, v14.16b
eor v7.16b, v7.16b, v15.16b
st1 {v0.16b-v3.16b}, [x1], #64
st1 {v4.16b-v7.16b}, [x1], #64
tweak_next(v8, v15, RTMP3)
cbz w4, .Lxts_dec_cts
b .Lxts_dec_loop_8x
.Lxts_dec_4x:
add w4, w4, #8
cmp w4, #4
blt .Lxts_dec_loop_1x
sub w4, w4, #4
tweak_next( v9, v8, RTMP0)
tweak_next(v10, v9, RTMP1)
tweak_next(v11, v10, RTMP2)
ld1 {v0.16b-v3.16b}, [x2], #64
eor v0.16b, v0.16b, v8.16b
eor v1.16b, v1.16b, v9.16b
eor v2.16b, v2.16b, v10.16b
eor v3.16b, v3.16b, v11.16b
SM4_CRYPT_BLK4(v0, v1, v2, v3)
eor v0.16b, v0.16b, v8.16b
eor v1.16b, v1.16b, v9.16b
eor v2.16b, v2.16b, v10.16b
eor v3.16b, v3.16b, v11.16b
st1 {v0.16b-v3.16b}, [x1], #64
tweak_next(v8, v11, RTMP3)
cbz w4, .Lxts_dec_cts
.Lxts_dec_loop_1x:
sub w4, w4, #1
ld1 {v0.16b}, [x2], #16
eor v0.16b, v0.16b, v8.16b
SM4_CRYPT_BLK(v0)
eor v0.16b, v0.16b, v8.16b
st1 {v0.16b}, [x1], #16
tweak_next(v8, v8, RTMP0)
cbnz w4, .Lxts_dec_loop_1x
.Lxts_dec_cts:
cbz x5, .Lxts_dec_end
/* cipher text stealing */
tweak_next(v9, v8, RTMP0)
ld1 {v0.16b}, [x2]
eor v0.16b, v0.16b, v9.16b
SM4_CRYPT_BLK(v0)
eor v0.16b, v0.16b, v9.16b
/* load permute table */
adr_l x6, .Lcts_permute_table
add x7, x6, #32
add x6, x6, x5
sub x7, x7, x5
ld1 {v3.16b}, [x6]
ld1 {v4.16b}, [x7]
/* overlapping loads */
add x2, x2, x5
ld1 {v1.16b}, [x2]
/* create Cn from En-1 */
tbl v2.16b, {v0.16b}, v3.16b
/* padding Pn with En-1 at the end */
tbx v0.16b, {v1.16b}, v4.16b
eor v0.16b, v0.16b, v8.16b
SM4_CRYPT_BLK(v0)
eor v0.16b, v0.16b, v8.16b
/* overlapping stores */
add x5, x1, x5
st1 {v2.16b}, [x5]
st1 {v0.16b}, [x1]
b .Lxts_dec_ret
.Lxts_dec_end:
/* store new tweak */
st1 {v8.16b}, [x3]
.Lxts_dec_ret:
ret
SYM_FUNC_END(sm4_ce_xts_dec)
.align 3
SYM_FUNC_START(sm4_ce_mac_update)
/* input:
* x0: round key array, CTX
* x1: digest
* x2: src
* w3: nblocks
* w4: enc_before
* w5: enc_after
*/
SM4_PREPARE(x0)
ld1 {RMAC.16b}, [x1]
cbz w4, .Lmac_update
SM4_CRYPT_BLK(RMAC)
.Lmac_update:
cbz w3, .Lmac_ret
sub w6, w3, #1
cmp w5, wzr
csel w3, w3, w6, ne
cbz w3, .Lmac_end
.Lmac_loop_4x:
cmp w3, #4
blt .Lmac_loop_1x
sub w3, w3, #4
ld1 {v0.16b-v3.16b}, [x2], #64
eor RMAC.16b, RMAC.16b, v0.16b
SM4_CRYPT_BLK(RMAC)
eor RMAC.16b, RMAC.16b, v1.16b
SM4_CRYPT_BLK(RMAC)
eor RMAC.16b, RMAC.16b, v2.16b
SM4_CRYPT_BLK(RMAC)
eor RMAC.16b, RMAC.16b, v3.16b
SM4_CRYPT_BLK(RMAC)
cbz w3, .Lmac_end
b .Lmac_loop_4x
.Lmac_loop_1x:
sub w3, w3, #1
ld1 {v0.16b}, [x2], #16
eor RMAC.16b, RMAC.16b, v0.16b
SM4_CRYPT_BLK(RMAC)
cbnz w3, .Lmac_loop_1x
.Lmac_end:
cbnz w5, .Lmac_ret
ld1 {v0.16b}, [x2], #16
eor RMAC.16b, RMAC.16b, v0.16b
.Lmac_ret:
st1 {RMAC.16b}, [x1]
ret
SYM_FUNC_END(sm4_ce_mac_update)
.section ".rodata", "a"
.align 4
.Lbswap128_mask:
.byte 0x0c, 0x0d, 0x0e, 0x0f, 0x08, 0x09, 0x0a, 0x0b
.byte 0x04, 0x05, 0x06, 0x07, 0x00, 0x01, 0x02, 0x03
.Lcts_permute_table:
.byte 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff
.byte 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff
.byte 0x0, 0x1, 0x2, 0x3, 0x4, 0x5, 0x6, 0x7
.byte 0x8, 0x9, 0xa, 0xb, 0xc, 0xd, 0xe, 0xf
.byte 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff
.byte 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff