crypto: aesni_intel - improve lrw and xts performance by utilizing parallel AES-NI hardware pipelines

Use parallel LRW and XTS encryption facilities to better utilize AES-NI
hardware pipelines and gain extra performance.

Tcrypt benchmark results (async), old vs new ratios:

Intel Core i5-2450M CPU (fam: 6, model: 42, step: 7)

aes:128bit
        lrw:256bit      xts:256bit
size    lrw-enc lrw-dec xts-dec xts-dec
16B     0.99x   1.00x   1.22x   1.19x
64B     1.38x   1.50x   1.58x   1.61x
256B    2.04x   2.02x   2.27x   2.29x
1024B   2.56x   2.54x   2.89x   2.92x
8192B   2.85x   2.99x   3.40x   3.23x

aes:192bit
        lrw:320bit      xts:384bit
size    lrw-enc lrw-dec xts-dec xts-dec
16B     1.08x   1.08x   1.16x   1.17x
64B     1.48x   1.54x   1.59x   1.65x
256B    2.18x   2.17x   2.29x   2.28x
1024B   2.67x   2.67x   2.87x   3.05x
8192B   2.93x   2.84x   3.28x   3.33x

aes:256bit
        lrw:348bit      xts:512bit
size    lrw-enc lrw-dec xts-dec xts-dec
16B     1.07x   1.07x   1.18x   1.19x
64B     1.56x   1.56x   1.70x   1.71x
256B    2.22x   2.24x   2.46x   2.46x
1024B   2.76x   2.77x   3.13x   3.05x
8192B   2.99x   3.05x   3.40x   3.30x

Cc: Huang Ying <ying.huang@intel.com>
Signed-off-by: Jussi Kivilinna <jussi.kivilinna@mbnet.fi>
Reviewed-by: Kim Phillips <kim.phillips@freescale.com>
Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
This commit is contained in:
Jussi Kivilinna 2012-07-22 18:18:37 +03:00 committed by Herbert Xu
parent 35a1fc1873
commit 023af60825
2 changed files with 231 additions and 46 deletions

View File

@ -28,6 +28,9 @@
#include <crypto/aes.h> #include <crypto/aes.h>
#include <crypto/cryptd.h> #include <crypto/cryptd.h>
#include <crypto/ctr.h> #include <crypto/ctr.h>
#include <crypto/b128ops.h>
#include <crypto/lrw.h>
#include <crypto/xts.h>
#include <asm/cpu_device_id.h> #include <asm/cpu_device_id.h>
#include <asm/i387.h> #include <asm/i387.h>
#include <asm/crypto/aes.h> #include <asm/crypto/aes.h>
@ -41,18 +44,10 @@
#define HAS_CTR #define HAS_CTR
#endif #endif
#if defined(CONFIG_CRYPTO_LRW) || defined(CONFIG_CRYPTO_LRW_MODULE)
#define HAS_LRW
#endif
#if defined(CONFIG_CRYPTO_PCBC) || defined(CONFIG_CRYPTO_PCBC_MODULE) #if defined(CONFIG_CRYPTO_PCBC) || defined(CONFIG_CRYPTO_PCBC_MODULE)
#define HAS_PCBC #define HAS_PCBC
#endif #endif
#if defined(CONFIG_CRYPTO_XTS) || defined(CONFIG_CRYPTO_XTS_MODULE)
#define HAS_XTS
#endif
/* This data is stored at the end of the crypto_tfm struct. /* This data is stored at the end of the crypto_tfm struct.
* It's a type of per "session" data storage location. * It's a type of per "session" data storage location.
* This needs to be 16 byte aligned. * This needs to be 16 byte aligned.
@ -79,6 +74,16 @@ struct aesni_hash_subkey_req_data {
#define AES_BLOCK_MASK (~(AES_BLOCK_SIZE-1)) #define AES_BLOCK_MASK (~(AES_BLOCK_SIZE-1))
#define RFC4106_HASH_SUBKEY_SIZE 16 #define RFC4106_HASH_SUBKEY_SIZE 16
struct aesni_lrw_ctx {
struct lrw_table_ctx lrw_table;
u8 raw_aes_ctx[sizeof(struct crypto_aes_ctx) + AESNI_ALIGN - 1];
};
struct aesni_xts_ctx {
u8 raw_tweak_ctx[sizeof(struct crypto_aes_ctx) + AESNI_ALIGN - 1];
u8 raw_crypt_ctx[sizeof(struct crypto_aes_ctx) + AESNI_ALIGN - 1];
};
asmlinkage int aesni_set_key(struct crypto_aes_ctx *ctx, const u8 *in_key, asmlinkage int aesni_set_key(struct crypto_aes_ctx *ctx, const u8 *in_key,
unsigned int key_len); unsigned int key_len);
asmlinkage void aesni_enc(struct crypto_aes_ctx *ctx, u8 *out, asmlinkage void aesni_enc(struct crypto_aes_ctx *ctx, u8 *out,
@ -398,13 +403,6 @@ static int ablk_rfc3686_ctr_init(struct crypto_tfm *tfm)
#endif #endif
#endif #endif
#ifdef HAS_LRW
static int ablk_lrw_init(struct crypto_tfm *tfm)
{
return ablk_init_common(tfm, "fpu(lrw(__driver-aes-aesni))");
}
#endif
#ifdef HAS_PCBC #ifdef HAS_PCBC
static int ablk_pcbc_init(struct crypto_tfm *tfm) static int ablk_pcbc_init(struct crypto_tfm *tfm)
{ {
@ -412,12 +410,160 @@ static int ablk_pcbc_init(struct crypto_tfm *tfm)
} }
#endif #endif
#ifdef HAS_XTS static void lrw_xts_encrypt_callback(void *ctx, u8 *blks, unsigned int nbytes)
static int ablk_xts_init(struct crypto_tfm *tfm)
{ {
return ablk_init_common(tfm, "fpu(xts(__driver-aes-aesni))"); aesni_ecb_enc(ctx, blks, blks, nbytes);
}
static void lrw_xts_decrypt_callback(void *ctx, u8 *blks, unsigned int nbytes)
{
aesni_ecb_dec(ctx, blks, blks, nbytes);
}
static int lrw_aesni_setkey(struct crypto_tfm *tfm, const u8 *key,
unsigned int keylen)
{
struct aesni_lrw_ctx *ctx = crypto_tfm_ctx(tfm);
int err;
err = aes_set_key_common(tfm, ctx->raw_aes_ctx, key,
keylen - AES_BLOCK_SIZE);
if (err)
return err;
return lrw_init_table(&ctx->lrw_table, key + keylen - AES_BLOCK_SIZE);
}
static void lrw_aesni_exit_tfm(struct crypto_tfm *tfm)
{
struct aesni_lrw_ctx *ctx = crypto_tfm_ctx(tfm);
lrw_free_table(&ctx->lrw_table);
}
static int lrw_encrypt(struct blkcipher_desc *desc, struct scatterlist *dst,
struct scatterlist *src, unsigned int nbytes)
{
struct aesni_lrw_ctx *ctx = crypto_blkcipher_ctx(desc->tfm);
be128 buf[8];
struct lrw_crypt_req req = {
.tbuf = buf,
.tbuflen = sizeof(buf),
.table_ctx = &ctx->lrw_table,
.crypt_ctx = aes_ctx(ctx->raw_aes_ctx),
.crypt_fn = lrw_xts_encrypt_callback,
};
int ret;
desc->flags &= ~CRYPTO_TFM_REQ_MAY_SLEEP;
kernel_fpu_begin();
ret = lrw_crypt(desc, dst, src, nbytes, &req);
kernel_fpu_end();
return ret;
}
static int lrw_decrypt(struct blkcipher_desc *desc, struct scatterlist *dst,
struct scatterlist *src, unsigned int nbytes)
{
struct aesni_lrw_ctx *ctx = crypto_blkcipher_ctx(desc->tfm);
be128 buf[8];
struct lrw_crypt_req req = {
.tbuf = buf,
.tbuflen = sizeof(buf),
.table_ctx = &ctx->lrw_table,
.crypt_ctx = aes_ctx(ctx->raw_aes_ctx),
.crypt_fn = lrw_xts_decrypt_callback,
};
int ret;
desc->flags &= ~CRYPTO_TFM_REQ_MAY_SLEEP;
kernel_fpu_begin();
ret = lrw_crypt(desc, dst, src, nbytes, &req);
kernel_fpu_end();
return ret;
}
static int xts_aesni_setkey(struct crypto_tfm *tfm, const u8 *key,
unsigned int keylen)
{
struct aesni_xts_ctx *ctx = crypto_tfm_ctx(tfm);
u32 *flags = &tfm->crt_flags;
int err;
/* key consists of keys of equal size concatenated, therefore
* the length must be even
*/
if (keylen % 2) {
*flags |= CRYPTO_TFM_RES_BAD_KEY_LEN;
return -EINVAL;
}
/* first half of xts-key is for crypt */
err = aes_set_key_common(tfm, ctx->raw_crypt_ctx, key, keylen / 2);
if (err)
return err;
/* second half of xts-key is for tweak */
return aes_set_key_common(tfm, ctx->raw_tweak_ctx, key + keylen / 2,
keylen / 2);
}
static int xts_encrypt(struct blkcipher_desc *desc, struct scatterlist *dst,
struct scatterlist *src, unsigned int nbytes)
{
struct aesni_xts_ctx *ctx = crypto_blkcipher_ctx(desc->tfm);
be128 buf[8];
struct xts_crypt_req req = {
.tbuf = buf,
.tbuflen = sizeof(buf),
.tweak_ctx = aes_ctx(ctx->raw_tweak_ctx),
.tweak_fn = XTS_TWEAK_CAST(aesni_enc),
.crypt_ctx = aes_ctx(ctx->raw_crypt_ctx),
.crypt_fn = lrw_xts_encrypt_callback,
};
int ret;
desc->flags &= ~CRYPTO_TFM_REQ_MAY_SLEEP;
kernel_fpu_begin();
ret = xts_crypt(desc, dst, src, nbytes, &req);
kernel_fpu_end();
return ret;
}
static int xts_decrypt(struct blkcipher_desc *desc, struct scatterlist *dst,
struct scatterlist *src, unsigned int nbytes)
{
struct aesni_xts_ctx *ctx = crypto_blkcipher_ctx(desc->tfm);
be128 buf[8];
struct xts_crypt_req req = {
.tbuf = buf,
.tbuflen = sizeof(buf),
.tweak_ctx = aes_ctx(ctx->raw_tweak_ctx),
.tweak_fn = XTS_TWEAK_CAST(aesni_enc),
.crypt_ctx = aes_ctx(ctx->raw_crypt_ctx),
.crypt_fn = lrw_xts_decrypt_callback,
};
int ret;
desc->flags &= ~CRYPTO_TFM_REQ_MAY_SLEEP;
kernel_fpu_begin();
ret = xts_crypt(desc, dst, src, nbytes, &req);
kernel_fpu_end();
return ret;
} }
#endif
#ifdef CONFIG_X86_64 #ifdef CONFIG_X86_64
static int rfc4106_init(struct crypto_tfm *tfm) static int rfc4106_init(struct crypto_tfm *tfm)
@ -1035,30 +1181,6 @@ static struct crypto_alg aesni_algs[] = { {
}, },
#endif #endif
#endif #endif
#ifdef HAS_LRW
}, {
.cra_name = "lrw(aes)",
.cra_driver_name = "lrw-aes-aesni",
.cra_priority = 400,
.cra_flags = CRYPTO_ALG_TYPE_ABLKCIPHER | CRYPTO_ALG_ASYNC,
.cra_blocksize = AES_BLOCK_SIZE,
.cra_ctxsize = sizeof(struct async_helper_ctx),
.cra_alignmask = 0,
.cra_type = &crypto_ablkcipher_type,
.cra_module = THIS_MODULE,
.cra_init = ablk_lrw_init,
.cra_exit = ablk_exit,
.cra_u = {
.ablkcipher = {
.min_keysize = AES_MIN_KEY_SIZE + AES_BLOCK_SIZE,
.max_keysize = AES_MAX_KEY_SIZE + AES_BLOCK_SIZE,
.ivsize = AES_BLOCK_SIZE,
.setkey = ablk_set_key,
.encrypt = ablk_encrypt,
.decrypt = ablk_decrypt,
},
},
#endif
#ifdef HAS_PCBC #ifdef HAS_PCBC
}, { }, {
.cra_name = "pcbc(aes)", .cra_name = "pcbc(aes)",
@ -1083,7 +1205,69 @@ static struct crypto_alg aesni_algs[] = { {
}, },
}, },
#endif #endif
#ifdef HAS_XTS }, {
.cra_name = "__lrw-aes-aesni",
.cra_driver_name = "__driver-lrw-aes-aesni",
.cra_priority = 0,
.cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER,
.cra_blocksize = AES_BLOCK_SIZE,
.cra_ctxsize = sizeof(struct aesni_lrw_ctx),
.cra_alignmask = 0,
.cra_type = &crypto_blkcipher_type,
.cra_module = THIS_MODULE,
.cra_exit = lrw_aesni_exit_tfm,
.cra_u = {
.blkcipher = {
.min_keysize = AES_MIN_KEY_SIZE + AES_BLOCK_SIZE,
.max_keysize = AES_MAX_KEY_SIZE + AES_BLOCK_SIZE,
.ivsize = AES_BLOCK_SIZE,
.setkey = lrw_aesni_setkey,
.encrypt = lrw_encrypt,
.decrypt = lrw_decrypt,
},
},
}, {
.cra_name = "__xts-aes-aesni",
.cra_driver_name = "__driver-xts-aes-aesni",
.cra_priority = 0,
.cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER,
.cra_blocksize = AES_BLOCK_SIZE,
.cra_ctxsize = sizeof(struct aesni_xts_ctx),
.cra_alignmask = 0,
.cra_type = &crypto_blkcipher_type,
.cra_module = THIS_MODULE,
.cra_u = {
.blkcipher = {
.min_keysize = 2 * AES_MIN_KEY_SIZE,
.max_keysize = 2 * AES_MAX_KEY_SIZE,
.ivsize = AES_BLOCK_SIZE,
.setkey = xts_aesni_setkey,
.encrypt = xts_encrypt,
.decrypt = xts_decrypt,
},
},
}, {
.cra_name = "lrw(aes)",
.cra_driver_name = "lrw-aes-aesni",
.cra_priority = 400,
.cra_flags = CRYPTO_ALG_TYPE_ABLKCIPHER | CRYPTO_ALG_ASYNC,
.cra_blocksize = AES_BLOCK_SIZE,
.cra_ctxsize = sizeof(struct async_helper_ctx),
.cra_alignmask = 0,
.cra_type = &crypto_ablkcipher_type,
.cra_module = THIS_MODULE,
.cra_init = ablk_init,
.cra_exit = ablk_exit,
.cra_u = {
.ablkcipher = {
.min_keysize = AES_MIN_KEY_SIZE + AES_BLOCK_SIZE,
.max_keysize = AES_MAX_KEY_SIZE + AES_BLOCK_SIZE,
.ivsize = AES_BLOCK_SIZE,
.setkey = ablk_set_key,
.encrypt = ablk_encrypt,
.decrypt = ablk_decrypt,
},
},
}, { }, {
.cra_name = "xts(aes)", .cra_name = "xts(aes)",
.cra_driver_name = "xts-aes-aesni", .cra_driver_name = "xts-aes-aesni",
@ -1094,7 +1278,7 @@ static struct crypto_alg aesni_algs[] = { {
.cra_alignmask = 0, .cra_alignmask = 0,
.cra_type = &crypto_ablkcipher_type, .cra_type = &crypto_ablkcipher_type,
.cra_module = THIS_MODULE, .cra_module = THIS_MODULE,
.cra_init = ablk_xts_init, .cra_init = ablk_init,
.cra_exit = ablk_exit, .cra_exit = ablk_exit,
.cra_u = { .cra_u = {
.ablkcipher = { .ablkcipher = {
@ -1106,7 +1290,6 @@ static struct crypto_alg aesni_algs[] = { {
.decrypt = ablk_decrypt, .decrypt = ablk_decrypt,
}, },
}, },
#endif
} }; } };

View File

@ -564,6 +564,8 @@ config CRYPTO_AES_NI_INTEL
select CRYPTO_CRYPTD select CRYPTO_CRYPTD
select CRYPTO_ABLK_HELPER_X86 select CRYPTO_ABLK_HELPER_X86
select CRYPTO_ALGAPI select CRYPTO_ALGAPI
select CRYPTO_LRW
select CRYPTO_XTS
help help
Use Intel AES-NI instructions for AES algorithm. Use Intel AES-NI instructions for AES algorithm.