2007-10-24 04:37:23 +08:00
|
|
|
#
|
|
|
|
# Arch-specific CryptoAPI modules.
|
|
|
|
#
|
|
|
|
|
2013-03-24 21:34:07 +08:00
|
|
|
avx_supported := $(call as-instr,vpxor %xmm0$(comma)%xmm0$(comma)%xmm0,yes,no)
|
2013-09-03 21:49:47 +08:00
|
|
|
avx2_supported := $(call as-instr,vpgatherdd %ymm0$(comma)(%eax$(comma)%ymm1\
|
|
|
|
$(comma)4)$(comma)%ymm2,yes,no)
|
2013-03-24 21:34:07 +08:00
|
|
|
|
2012-06-18 19:07:19 +08:00
|
|
|
obj-$(CONFIG_CRYPTO_GLUE_HELPER_X86) += glue_helper.o
|
2012-06-18 19:06:58 +08:00
|
|
|
|
2007-10-24 04:37:23 +08:00
|
|
|
obj-$(CONFIG_CRYPTO_AES_586) += aes-i586.o
|
|
|
|
obj-$(CONFIG_CRYPTO_TWOFISH_586) += twofish-i586.o
|
2007-12-10 15:52:56 +08:00
|
|
|
obj-$(CONFIG_CRYPTO_SALSA20_586) += salsa20-i586.o
|
2011-11-09 22:26:31 +08:00
|
|
|
obj-$(CONFIG_CRYPTO_SERPENT_SSE2_586) += serpent-sse2-i586.o
|
2007-10-24 04:37:23 +08:00
|
|
|
|
|
|
|
obj-$(CONFIG_CRYPTO_AES_X86_64) += aes-x86_64.o
|
2014-06-10 01:59:54 +08:00
|
|
|
obj-$(CONFIG_CRYPTO_DES3_EDE_X86_64) += des3_ede-x86_64.o
|
2012-03-06 02:26:47 +08:00
|
|
|
obj-$(CONFIG_CRYPTO_CAMELLIA_X86_64) += camellia-x86_64.o
|
2011-09-02 06:45:22 +08:00
|
|
|
obj-$(CONFIG_CRYPTO_BLOWFISH_X86_64) += blowfish-x86_64.o
|
2007-10-24 04:37:23 +08:00
|
|
|
obj-$(CONFIG_CRYPTO_TWOFISH_X86_64) += twofish-x86_64.o
|
2011-09-26 21:47:25 +08:00
|
|
|
obj-$(CONFIG_CRYPTO_TWOFISH_X86_64_3WAY) += twofish-x86_64-3way.o
|
2007-12-18 00:04:40 +08:00
|
|
|
obj-$(CONFIG_CRYPTO_SALSA20_X86_64) += salsa20-x86_64.o
|
crypto: chacha20 - Add a SSSE3 SIMD variant for x86_64
Implements an x86_64 assembler driver for the ChaCha20 stream cipher. This
single block variant works on a single state matrix using SSE instructions.
It requires SSSE3 due the use of pshufb for efficient 8/16-bit rotate
operations.
For large messages, throughput increases by ~65% compared to
chacha20-generic:
testing speed of chacha20 (chacha20-generic) encryption
test 0 (256 bit key, 16 byte blocks): 45089207 operations in 10 seconds (721427312 bytes)
test 1 (256 bit key, 64 byte blocks): 43839521 operations in 10 seconds (2805729344 bytes)
test 2 (256 bit key, 256 byte blocks): 12702056 operations in 10 seconds (3251726336 bytes)
test 3 (256 bit key, 1024 byte blocks): 3371173 operations in 10 seconds (3452081152 bytes)
test 4 (256 bit key, 8192 byte blocks): 422468 operations in 10 seconds (3460857856 bytes)
testing speed of chacha20 (chacha20-simd) encryption
test 0 (256 bit key, 16 byte blocks): 43141886 operations in 10 seconds (690270176 bytes)
test 1 (256 bit key, 64 byte blocks): 46845874 operations in 10 seconds (2998135936 bytes)
test 2 (256 bit key, 256 byte blocks): 18458512 operations in 10 seconds (4725379072 bytes)
test 3 (256 bit key, 1024 byte blocks): 5360533 operations in 10 seconds (5489185792 bytes)
test 4 (256 bit key, 8192 byte blocks): 692846 operations in 10 seconds (5675794432 bytes)
Benchmark results from a Core i5-4670T.
Signed-off-by: Martin Willi <martin@strongswan.org>
Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
2015-07-17 01:14:01 +08:00
|
|
|
obj-$(CONFIG_CRYPTO_CHACHA20_X86_64) += chacha20-x86_64.o
|
2011-11-09 22:26:25 +08:00
|
|
|
obj-$(CONFIG_CRYPTO_SERPENT_SSE2_X86_64) += serpent-sse2-x86_64.o
|
2009-01-18 13:28:34 +08:00
|
|
|
obj-$(CONFIG_CRYPTO_AES_NI_INTEL) += aesni-intel.o
|
2009-10-19 10:53:06 +08:00
|
|
|
obj-$(CONFIG_CRYPTO_GHASH_CLMUL_NI_INTEL) += ghash-clmulni-intel.o
|
2007-10-24 04:37:23 +08:00
|
|
|
|
2008-08-07 09:57:03 +08:00
|
|
|
obj-$(CONFIG_CRYPTO_CRC32C_INTEL) += crc32c-intel.o
|
2011-08-05 02:19:25 +08:00
|
|
|
obj-$(CONFIG_CRYPTO_SHA1_SSSE3) += sha1-ssse3.o
|
2013-01-10 22:54:59 +08:00
|
|
|
obj-$(CONFIG_CRYPTO_CRC32_PCLMUL) += crc32-pclmul.o
|
2013-03-27 04:59:17 +08:00
|
|
|
obj-$(CONFIG_CRYPTO_SHA256_SSSE3) += sha256-ssse3.o
|
2013-03-27 05:00:02 +08:00
|
|
|
obj-$(CONFIG_CRYPTO_SHA512_SSSE3) += sha512-ssse3.o
|
2013-09-07 10:56:26 +08:00
|
|
|
obj-$(CONFIG_CRYPTO_CRCT10DIF_PCLMUL) += crct10dif-pclmul.o
|
2008-08-07 09:57:03 +08:00
|
|
|
|
2013-03-24 21:34:07 +08:00
|
|
|
# These modules require assembler to support AVX.
|
|
|
|
ifeq ($(avx_supported),yes)
|
|
|
|
obj-$(CONFIG_CRYPTO_CAMELLIA_AESNI_AVX_X86_64) += \
|
|
|
|
camellia-aesni-avx-x86_64.o
|
|
|
|
obj-$(CONFIG_CRYPTO_CAST5_AVX_X86_64) += cast5-avx-x86_64.o
|
|
|
|
obj-$(CONFIG_CRYPTO_CAST6_AVX_X86_64) += cast6-avx-x86_64.o
|
|
|
|
obj-$(CONFIG_CRYPTO_TWOFISH_AVX_X86_64) += twofish-avx-x86_64.o
|
|
|
|
obj-$(CONFIG_CRYPTO_SERPENT_AVX_X86_64) += serpent-avx-x86_64.o
|
|
|
|
endif
|
|
|
|
|
2013-04-13 18:46:45 +08:00
|
|
|
# These modules require assembler to support AVX2.
|
|
|
|
ifeq ($(avx2_supported),yes)
|
2013-04-13 18:47:00 +08:00
|
|
|
obj-$(CONFIG_CRYPTO_CAMELLIA_AESNI_AVX2_X86_64) += camellia-aesni-avx2.o
|
2013-04-13 18:46:55 +08:00
|
|
|
obj-$(CONFIG_CRYPTO_SERPENT_AVX2_X86_64) += serpent-avx2.o
|
2014-12-30 08:20:39 +08:00
|
|
|
obj-$(CONFIG_CRYPTO_SHA1_MB) += sha-mb/
|
2013-04-13 18:46:45 +08:00
|
|
|
endif
|
|
|
|
|
2007-11-29 21:15:11 +08:00
|
|
|
aes-i586-y := aes-i586-asm_32.o aes_glue.o
|
2008-01-14 14:07:57 +08:00
|
|
|
twofish-i586-y := twofish-i586-asm_32.o twofish_glue.o
|
2007-12-10 15:52:56 +08:00
|
|
|
salsa20-i586-y := salsa20-i586-asm_32.o salsa20_glue.o
|
2011-11-09 22:26:31 +08:00
|
|
|
serpent-sse2-i586-y := serpent-sse2-i586-asm_32.o serpent_sse2_glue.o
|
2007-10-24 04:37:23 +08:00
|
|
|
|
2007-11-29 21:15:11 +08:00
|
|
|
aes-x86_64-y := aes-x86_64-asm_64.o aes_glue.o
|
2014-06-10 01:59:54 +08:00
|
|
|
des3_ede-x86_64-y := des3_ede-asm_64.o des3_ede_glue.o
|
2012-03-06 02:26:47 +08:00
|
|
|
camellia-x86_64-y := camellia-x86_64-asm_64.o camellia_glue.o
|
2011-09-02 06:45:22 +08:00
|
|
|
blowfish-x86_64-y := blowfish-x86_64-asm_64.o blowfish_glue.o
|
2008-01-14 14:07:57 +08:00
|
|
|
twofish-x86_64-y := twofish-x86_64-asm_64.o twofish_glue.o
|
2011-09-26 21:47:25 +08:00
|
|
|
twofish-x86_64-3way-y := twofish-x86_64-asm_64-3way.o twofish_glue_3way.o
|
2007-12-18 00:04:40 +08:00
|
|
|
salsa20-x86_64-y := salsa20-x86_64-asm_64.o salsa20_glue.o
|
crypto: chacha20 - Add a SSSE3 SIMD variant for x86_64
Implements an x86_64 assembler driver for the ChaCha20 stream cipher. This
single block variant works on a single state matrix using SSE instructions.
It requires SSSE3 due the use of pshufb for efficient 8/16-bit rotate
operations.
For large messages, throughput increases by ~65% compared to
chacha20-generic:
testing speed of chacha20 (chacha20-generic) encryption
test 0 (256 bit key, 16 byte blocks): 45089207 operations in 10 seconds (721427312 bytes)
test 1 (256 bit key, 64 byte blocks): 43839521 operations in 10 seconds (2805729344 bytes)
test 2 (256 bit key, 256 byte blocks): 12702056 operations in 10 seconds (3251726336 bytes)
test 3 (256 bit key, 1024 byte blocks): 3371173 operations in 10 seconds (3452081152 bytes)
test 4 (256 bit key, 8192 byte blocks): 422468 operations in 10 seconds (3460857856 bytes)
testing speed of chacha20 (chacha20-simd) encryption
test 0 (256 bit key, 16 byte blocks): 43141886 operations in 10 seconds (690270176 bytes)
test 1 (256 bit key, 64 byte blocks): 46845874 operations in 10 seconds (2998135936 bytes)
test 2 (256 bit key, 256 byte blocks): 18458512 operations in 10 seconds (4725379072 bytes)
test 3 (256 bit key, 1024 byte blocks): 5360533 operations in 10 seconds (5489185792 bytes)
test 4 (256 bit key, 8192 byte blocks): 692846 operations in 10 seconds (5675794432 bytes)
Benchmark results from a Core i5-4670T.
Signed-off-by: Martin Willi <martin@strongswan.org>
Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
2015-07-17 01:14:01 +08:00
|
|
|
chacha20-x86_64-y := chacha20-ssse3-x86_64.o chacha20_glue.o
|
2011-11-09 22:26:25 +08:00
|
|
|
serpent-sse2-x86_64-y := serpent-sse2-x86_64-asm_64.o serpent_sse2_glue.o
|
2013-03-24 21:34:07 +08:00
|
|
|
|
|
|
|
ifeq ($(avx_supported),yes)
|
|
|
|
camellia-aesni-avx-x86_64-y := camellia-aesni-avx-asm_64.o \
|
|
|
|
camellia_aesni_avx_glue.o
|
|
|
|
cast5-avx-x86_64-y := cast5-avx-x86_64-asm_64.o cast5_avx_glue.o
|
|
|
|
cast6-avx-x86_64-y := cast6-avx-x86_64-asm_64.o cast6_avx_glue.o
|
|
|
|
twofish-avx-x86_64-y := twofish-avx-x86_64-asm_64.o \
|
|
|
|
twofish_avx_glue.o
|
|
|
|
serpent-avx-x86_64-y := serpent-avx-x86_64-asm_64.o \
|
|
|
|
serpent_avx_glue.o
|
|
|
|
endif
|
2009-01-18 13:28:34 +08:00
|
|
|
|
2013-04-13 18:46:45 +08:00
|
|
|
ifeq ($(avx2_supported),yes)
|
2013-04-13 18:47:00 +08:00
|
|
|
camellia-aesni-avx2-y := camellia-aesni-avx2-asm_64.o camellia_aesni_avx2_glue.o
|
crypto: chacha20 - Add an eight block AVX2 variant for x86_64
Extends the x86_64 ChaCha20 implementation by a function processing eight
ChaCha20 blocks in parallel using AVX2.
For large messages, throughput increases by ~55-70% compared to four block
SSSE3:
testing speed of chacha20 (chacha20-simd) encryption
test 0 (256 bit key, 16 byte blocks): 42249230 operations in 10 seconds (675987680 bytes)
test 1 (256 bit key, 64 byte blocks): 46441641 operations in 10 seconds (2972265024 bytes)
test 2 (256 bit key, 256 byte blocks): 33028112 operations in 10 seconds (8455196672 bytes)
test 3 (256 bit key, 1024 byte blocks): 11568759 operations in 10 seconds (11846409216 bytes)
test 4 (256 bit key, 8192 byte blocks): 1448761 operations in 10 seconds (11868250112 bytes)
testing speed of chacha20 (chacha20-simd) encryption
test 0 (256 bit key, 16 byte blocks): 41999675 operations in 10 seconds (671994800 bytes)
test 1 (256 bit key, 64 byte blocks): 45805908 operations in 10 seconds (2931578112 bytes)
test 2 (256 bit key, 256 byte blocks): 32814947 operations in 10 seconds (8400626432 bytes)
test 3 (256 bit key, 1024 byte blocks): 19777167 operations in 10 seconds (20251819008 bytes)
test 4 (256 bit key, 8192 byte blocks): 2279321 operations in 10 seconds (18672197632 bytes)
Benchmark results from a Core i5-4670T.
Signed-off-by: Martin Willi <martin@strongswan.org>
Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
2015-07-17 01:14:03 +08:00
|
|
|
chacha20-x86_64-y += chacha20-avx2-x86_64.o
|
2013-04-13 18:46:55 +08:00
|
|
|
serpent-avx2-y := serpent-avx2-asm_64.o serpent_avx2_glue.o
|
2013-04-13 18:46:45 +08:00
|
|
|
endif
|
|
|
|
|
2013-12-30 21:52:24 +08:00
|
|
|
aesni-intel-y := aesni-intel_asm.o aesni-intel_glue.o fpu.o
|
crypto: aes - AES CTR x86_64 "by8" AVX optimization
This patch introduces "by8" AES CTR mode AVX optimization inspired by
Intel Optimized IPSEC Cryptograhpic library. For additional information,
please see:
http://downloadcenter.intel.com/Detail_Desc.aspx?agr=Y&DwnldID=22972
The functions aes_ctr_enc_128_avx_by8(), aes_ctr_enc_192_avx_by8() and
aes_ctr_enc_256_avx_by8() are adapted from
Intel Optimized IPSEC Cryptographic library. When both AES and AVX features
are enabled in a platform, the glue code in AESNI module overrieds the
existing "by4" CTR mode en/decryption with the "by8"
AES CTR mode en/decryption.
On a Haswell desktop, with turbo disabled and all cpus running
at maximum frequency, the "by8" CTR mode optimization
shows better performance results across data & key sizes
as measured by tcrypt.
The average performance improvement of the "by8" version over the "by4"
version is as follows:
For 128 bit key and data sizes >= 256 bytes, there is a 10-16% improvement.
For 192 bit key and data sizes >= 256 bytes, there is a 20-22% improvement.
For 256 bit key and data sizes >= 256 bytes, there is a 20-25% improvement.
A typical run of tcrypt with AES CTR mode encryption of the "by4" and "by8"
optimization shows the following results:
tcrypt with "by4" AES CTR mode encryption optimization on a Haswell Desktop:
---------------------------------------------------------------------------
testing speed of __ctr-aes-aesni encryption
test 0 (128 bit key, 16 byte blocks): 1 operation in 343 cycles (16 bytes)
test 1 (128 bit key, 64 byte blocks): 1 operation in 336 cycles (64 bytes)
test 2 (128 bit key, 256 byte blocks): 1 operation in 491 cycles (256 bytes)
test 3 (128 bit key, 1024 byte blocks): 1 operation in 1130 cycles (1024 bytes)
test 4 (128 bit key, 8192 byte blocks): 1 operation in 7309 cycles (8192 bytes)
test 5 (192 bit key, 16 byte blocks): 1 operation in 346 cycles (16 bytes)
test 6 (192 bit key, 64 byte blocks): 1 operation in 361 cycles (64 bytes)
test 7 (192 bit key, 256 byte blocks): 1 operation in 543 cycles (256 bytes)
test 8 (192 bit key, 1024 byte blocks): 1 operation in 1321 cycles (1024 bytes)
test 9 (192 bit key, 8192 byte blocks): 1 operation in 9649 cycles (8192 bytes)
test 10 (256 bit key, 16 byte blocks): 1 operation in 369 cycles (16 bytes)
test 11 (256 bit key, 64 byte blocks): 1 operation in 366 cycles (64 bytes)
test 12 (256 bit key, 256 byte blocks): 1 operation in 595 cycles (256 bytes)
test 13 (256 bit key, 1024 byte blocks): 1 operation in 1531 cycles (1024 bytes)
test 14 (256 bit key, 8192 byte blocks): 1 operation in 10522 cycles (8192 bytes)
testing speed of __ctr-aes-aesni decryption
test 0 (128 bit key, 16 byte blocks): 1 operation in 336 cycles (16 bytes)
test 1 (128 bit key, 64 byte blocks): 1 operation in 350 cycles (64 bytes)
test 2 (128 bit key, 256 byte blocks): 1 operation in 487 cycles (256 bytes)
test 3 (128 bit key, 1024 byte blocks): 1 operation in 1129 cycles (1024 bytes)
test 4 (128 bit key, 8192 byte blocks): 1 operation in 7287 cycles (8192 bytes)
test 5 (192 bit key, 16 byte blocks): 1 operation in 350 cycles (16 bytes)
test 6 (192 bit key, 64 byte blocks): 1 operation in 359 cycles (64 bytes)
test 7 (192 bit key, 256 byte blocks): 1 operation in 635 cycles (256 bytes)
test 8 (192 bit key, 1024 byte blocks): 1 operation in 1324 cycles (1024 bytes)
test 9 (192 bit key, 8192 byte blocks): 1 operation in 9595 cycles (8192 bytes)
test 10 (256 bit key, 16 byte blocks): 1 operation in 364 cycles (16 bytes)
test 11 (256 bit key, 64 byte blocks): 1 operation in 377 cycles (64 bytes)
test 12 (256 bit key, 256 byte blocks): 1 operation in 604 cycles (256 bytes)
test 13 (256 bit key, 1024 byte blocks): 1 operation in 1527 cycles (1024 bytes)
test 14 (256 bit key, 8192 byte blocks): 1 operation in 10549 cycles (8192 bytes)
tcrypt with "by8" AES CTR mode encryption optimization on a Haswell Desktop:
---------------------------------------------------------------------------
testing speed of __ctr-aes-aesni encryption
test 0 (128 bit key, 16 byte blocks): 1 operation in 340 cycles (16 bytes)
test 1 (128 bit key, 64 byte blocks): 1 operation in 330 cycles (64 bytes)
test 2 (128 bit key, 256 byte blocks): 1 operation in 450 cycles (256 bytes)
test 3 (128 bit key, 1024 byte blocks): 1 operation in 1043 cycles (1024 bytes)
test 4 (128 bit key, 8192 byte blocks): 1 operation in 6597 cycles (8192 bytes)
test 5 (192 bit key, 16 byte blocks): 1 operation in 339 cycles (16 bytes)
test 6 (192 bit key, 64 byte blocks): 1 operation in 352 cycles (64 bytes)
test 7 (192 bit key, 256 byte blocks): 1 operation in 539 cycles (256 bytes)
test 8 (192 bit key, 1024 byte blocks): 1 operation in 1153 cycles (1024 bytes)
test 9 (192 bit key, 8192 byte blocks): 1 operation in 8458 cycles (8192 bytes)
test 10 (256 bit key, 16 byte blocks): 1 operation in 353 cycles (16 bytes)
test 11 (256 bit key, 64 byte blocks): 1 operation in 360 cycles (64 bytes)
test 12 (256 bit key, 256 byte blocks): 1 operation in 512 cycles (256 bytes)
test 13 (256 bit key, 1024 byte blocks): 1 operation in 1277 cycles (1024 bytes)
test 14 (256 bit key, 8192 byte blocks): 1 operation in 8745 cycles (8192 bytes)
testing speed of __ctr-aes-aesni decryption
test 0 (128 bit key, 16 byte blocks): 1 operation in 348 cycles (16 bytes)
test 1 (128 bit key, 64 byte blocks): 1 operation in 335 cycles (64 bytes)
test 2 (128 bit key, 256 byte blocks): 1 operation in 451 cycles (256 bytes)
test 3 (128 bit key, 1024 byte blocks): 1 operation in 1030 cycles (1024 bytes)
test 4 (128 bit key, 8192 byte blocks): 1 operation in 6611 cycles (8192 bytes)
test 5 (192 bit key, 16 byte blocks): 1 operation in 354 cycles (16 bytes)
test 6 (192 bit key, 64 byte blocks): 1 operation in 346 cycles (64 bytes)
test 7 (192 bit key, 256 byte blocks): 1 operation in 488 cycles (256 bytes)
test 8 (192 bit key, 1024 byte blocks): 1 operation in 1154 cycles (1024 bytes)
test 9 (192 bit key, 8192 byte blocks): 1 operation in 8390 cycles (8192 bytes)
test 10 (256 bit key, 16 byte blocks): 1 operation in 357 cycles (16 bytes)
test 11 (256 bit key, 64 byte blocks): 1 operation in 362 cycles (64 bytes)
test 12 (256 bit key, 256 byte blocks): 1 operation in 515 cycles (256 bytes)
test 13 (256 bit key, 1024 byte blocks): 1 operation in 1284 cycles (1024 bytes)
test 14 (256 bit key, 8192 byte blocks): 1 operation in 8681 cycles (8192 bytes)
crypto: Incorporate feed back to AES CTR mode optimization patch
Specifically, the following:
a) alignment around main loop in aes_ctrby8_avx_x86_64.S
b) .rodata around data constants used in the assembely code.
c) the use of CONFIG_AVX in the glue code.
d) fix up white space.
e) informational message for "by8" AES CTR mode optimization
f) "by8" AES CTR mode optimization can be simply enabled
if the platform supports both AES and AVX features. The
optimization works superbly on Sandybridge as well.
Testing on Haswell shows no performance change since the last.
Testing on Sandybridge shows that the "by8" AES CTR mode optimization
greatly improves performance.
tcrypt log with "by4" AES CTR mode optimization on Sandybridge
--------------------------------------------------------------
testing speed of __ctr-aes-aesni encryption
test 0 (128 bit key, 16 byte blocks): 1 operation in 383 cycles (16 bytes)
test 1 (128 bit key, 64 byte blocks): 1 operation in 408 cycles (64 bytes)
test 2 (128 bit key, 256 byte blocks): 1 operation in 707 cycles (256 bytes)
test 3 (128 bit key, 1024 byte blocks): 1 operation in 1864 cycles (1024 bytes)
test 4 (128 bit key, 8192 byte blocks): 1 operation in 12813 cycles (8192 bytes)
test 5 (192 bit key, 16 byte blocks): 1 operation in 395 cycles (16 bytes)
test 6 (192 bit key, 64 byte blocks): 1 operation in 432 cycles (64 bytes)
test 7 (192 bit key, 256 byte blocks): 1 operation in 780 cycles (256 bytes)
test 8 (192 bit key, 1024 byte blocks): 1 operation in 2132 cycles (1024 bytes)
test 9 (192 bit key, 8192 byte blocks): 1 operation in 15765 cycles (8192 bytes)
test 10 (256 bit key, 16 byte blocks): 1 operation in 416 cycles (16 bytes)
test 11 (256 bit key, 64 byte blocks): 1 operation in 438 cycles (64 bytes)
test 12 (256 bit key, 256 byte blocks): 1 operation in 842 cycles (256 bytes)
test 13 (256 bit key, 1024 byte blocks): 1 operation in 2383 cycles (1024 bytes)
test 14 (256 bit key, 8192 byte blocks): 1 operation in 16945 cycles (8192 bytes)
testing speed of __ctr-aes-aesni decryption
test 0 (128 bit key, 16 byte blocks): 1 operation in 389 cycles (16 bytes)
test 1 (128 bit key, 64 byte blocks): 1 operation in 409 cycles (64 bytes)
test 2 (128 bit key, 256 byte blocks): 1 operation in 704 cycles (256 bytes)
test 3 (128 bit key, 1024 byte blocks): 1 operation in 1865 cycles (1024 bytes)
test 4 (128 bit key, 8192 byte blocks): 1 operation in 12783 cycles (8192 bytes)
test 5 (192 bit key, 16 byte blocks): 1 operation in 409 cycles (16 bytes)
test 6 (192 bit key, 64 byte blocks): 1 operation in 434 cycles (64 bytes)
test 7 (192 bit key, 256 byte blocks): 1 operation in 792 cycles (256 bytes)
test 8 (192 bit key, 1024 byte blocks): 1 operation in 2151 cycles (1024 bytes)
test 9 (192 bit key, 8192 byte blocks): 1 operation in 15804 cycles (8192 bytes)
test 10 (256 bit key, 16 byte blocks): 1 operation in 421 cycles (16 bytes)
test 11 (256 bit key, 64 byte blocks): 1 operation in 444 cycles (64 bytes)
test 12 (256 bit key, 256 byte blocks): 1 operation in 840 cycles (256 bytes)
test 13 (256 bit key, 1024 byte blocks): 1 operation in 2394 cycles (1024 bytes)
test 14 (256 bit key, 8192 byte blocks): 1 operation in 16928 cycles (8192 bytes)
tcrypt log with "by8" AES CTR mode optimization on Sandybridge
--------------------------------------------------------------
testing speed of __ctr-aes-aesni encryption
test 0 (128 bit key, 16 byte blocks): 1 operation in 383 cycles (16 bytes)
test 1 (128 bit key, 64 byte blocks): 1 operation in 401 cycles (64 bytes)
test 2 (128 bit key, 256 byte blocks): 1 operation in 522 cycles (256 bytes)
test 3 (128 bit key, 1024 byte blocks): 1 operation in 1136 cycles (1024 bytes)
test 4 (128 bit key, 8192 byte blocks): 1 operation in 7046 cycles (8192 bytes)
test 5 (192 bit key, 16 byte blocks): 1 operation in 394 cycles (16 bytes)
test 6 (192 bit key, 64 byte blocks): 1 operation in 418 cycles (64 bytes)
test 7 (192 bit key, 256 byte blocks): 1 operation in 559 cycles (256 bytes)
test 8 (192 bit key, 1024 byte blocks): 1 operation in 1263 cycles (1024 bytes)
test 9 (192 bit key, 8192 byte blocks): 1 operation in 9072 cycles (8192 bytes)
test 10 (256 bit key, 16 byte blocks): 1 operation in 408 cycles (16 bytes)
test 11 (256 bit key, 64 byte blocks): 1 operation in 428 cycles (64 bytes)
test 12 (256 bit key, 256 byte blocks): 1 operation in 595 cycles (256 bytes)
test 13 (256 bit key, 1024 byte blocks): 1 operation in 1385 cycles (1024 bytes)
test 14 (256 bit key, 8192 byte blocks): 1 operation in 9224 cycles (8192 bytes)
testing speed of __ctr-aes-aesni decryption
test 0 (128 bit key, 16 byte blocks): 1 operation in 390 cycles (16 bytes)
test 1 (128 bit key, 64 byte blocks): 1 operation in 402 cycles (64 bytes)
test 2 (128 bit key, 256 byte blocks): 1 operation in 530 cycles (256 bytes)
test 3 (128 bit key, 1024 byte blocks): 1 operation in 1135 cycles (1024 bytes)
test 4 (128 bit key, 8192 byte blocks): 1 operation in 7079 cycles (8192 bytes)
test 5 (192 bit key, 16 byte blocks): 1 operation in 414 cycles (16 bytes)
test 6 (192 bit key, 64 byte blocks): 1 operation in 417 cycles (64 bytes)
test 7 (192 bit key, 256 byte blocks): 1 operation in 572 cycles (256 bytes)
test 8 (192 bit key, 1024 byte blocks): 1 operation in 1312 cycles (1024 bytes)
test 9 (192 bit key, 8192 byte blocks): 1 operation in 9073 cycles (8192 bytes)
test 10 (256 bit key, 16 byte blocks): 1 operation in 415 cycles (16 bytes)
test 11 (256 bit key, 64 byte blocks): 1 operation in 454 cycles (64 bytes)
test 12 (256 bit key, 256 byte blocks): 1 operation in 598 cycles (256 bytes)
test 13 (256 bit key, 1024 byte blocks): 1 operation in 1407 cycles (1024 bytes)
test 14 (256 bit key, 8192 byte blocks): 1 operation in 9288 cycles (8192 bytes)
crypto: Fix redundant checks
a) Fix the redundant check for cpu_has_aes
b) Fix the key length check when invoking the CTR mode "by8"
encryptor/decryptor.
crypto: fix typo in AES ctr mode transform
Signed-off-by: Chandramouli Narayanan <mouli@linux.intel.com>
Reviewed-by: Mathias Krause <minipli@googlemail.com>
Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
2014-06-11 00:22:47 +08:00
|
|
|
aesni-intel-$(CONFIG_64BIT) += aesni-intel_avx-x86_64.o aes_ctrby8_avx-x86_64.o
|
2009-10-19 10:53:06 +08:00
|
|
|
ghash-clmulni-intel-y := ghash-clmulni-intel_asm.o ghash-clmulni-intel_glue.o
|
2011-08-05 02:19:25 +08:00
|
|
|
sha1-ssse3-y := sha1_ssse3_asm.o sha1_ssse3_glue.o
|
2014-03-21 06:14:00 +08:00
|
|
|
ifeq ($(avx2_supported),yes)
|
|
|
|
sha1-ssse3-y += sha1_avx2_x86_64_asm.o
|
|
|
|
endif
|
2012-09-28 06:44:17 +08:00
|
|
|
crc32c-intel-y := crc32c-intel_glue.o
|
2013-02-26 17:52:15 +08:00
|
|
|
crc32c-intel-$(CONFIG_64BIT) += crc32c-pcl-intel-asm_64.o
|
2013-01-10 22:54:59 +08:00
|
|
|
crc32-pclmul-y := crc32-pclmul_asm.o crc32-pclmul_glue.o
|
2013-03-27 04:59:17 +08:00
|
|
|
sha256-ssse3-y := sha256-ssse3-asm.o sha256-avx-asm.o sha256-avx2-asm.o sha256_ssse3_glue.o
|
2013-03-27 05:00:02 +08:00
|
|
|
sha512-ssse3-y := sha512-ssse3-asm.o sha512-avx-asm.o sha512-avx2-asm.o sha512_ssse3_glue.o
|
2013-09-07 10:56:26 +08:00
|
|
|
crct10dif-pclmul-y := crct10dif-pcl-asm_64.o crct10dif-pclmul_glue.o
|