OpenCloudOS-Kernel/drivers/crypto/talitos.c

3526 lines
101 KiB
C
Raw Normal View History

/*
* talitos - Freescale Integrated Security Engine (SEC) device driver
*
* Copyright (c) 2008-2011 Freescale Semiconductor, Inc.
*
* Scatterlist Crypto API glue code copied from files with the following:
* Copyright (c) 2006-2007 Herbert Xu <herbert@gondor.apana.org.au>
*
* Crypto algorithm registration code copied from hifn driver:
* 2007+ Copyright (c) Evgeniy Polyakov <johnpol@2ka.mipt.ru>
* All rights reserved.
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 2 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program; if not, write to the Free Software
* Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
*/
#include <linux/kernel.h>
#include <linux/module.h>
#include <linux/mod_devicetable.h>
#include <linux/device.h>
#include <linux/interrupt.h>
#include <linux/crypto.h>
#include <linux/hw_random.h>
#include <linux/of_address.h>
#include <linux/of_irq.h>
#include <linux/of_platform.h>
#include <linux/dma-mapping.h>
#include <linux/io.h>
#include <linux/spinlock.h>
#include <linux/rtnetlink.h>
include cleanup: Update gfp.h and slab.h includes to prepare for breaking implicit slab.h inclusion from percpu.h percpu.h is included by sched.h and module.h and thus ends up being included when building most .c files. percpu.h includes slab.h which in turn includes gfp.h making everything defined by the two files universally available and complicating inclusion dependencies. percpu.h -> slab.h dependency is about to be removed. Prepare for this change by updating users of gfp and slab facilities include those headers directly instead of assuming availability. As this conversion needs to touch large number of source files, the following script is used as the basis of conversion. http://userweb.kernel.org/~tj/misc/slabh-sweep.py The script does the followings. * Scan files for gfp and slab usages and update includes such that only the necessary includes are there. ie. if only gfp is used, gfp.h, if slab is used, slab.h. * When the script inserts a new include, it looks at the include blocks and try to put the new include such that its order conforms to its surrounding. It's put in the include block which contains core kernel includes, in the same order that the rest are ordered - alphabetical, Christmas tree, rev-Xmas-tree or at the end if there doesn't seem to be any matching order. * If the script can't find a place to put a new include (mostly because the file doesn't have fitting include block), it prints out an error message indicating which .h file needs to be added to the file. The conversion was done in the following steps. 1. The initial automatic conversion of all .c files updated slightly over 4000 files, deleting around 700 includes and adding ~480 gfp.h and ~3000 slab.h inclusions. The script emitted errors for ~400 files. 2. Each error was manually checked. Some didn't need the inclusion, some needed manual addition while adding it to implementation .h or embedding .c file was more appropriate for others. This step added inclusions to around 150 files. 3. The script was run again and the output was compared to the edits from #2 to make sure no file was left behind. 4. Several build tests were done and a couple of problems were fixed. e.g. lib/decompress_*.c used malloc/free() wrappers around slab APIs requiring slab.h to be added manually. 5. The script was run on all .h files but without automatically editing them as sprinkling gfp.h and slab.h inclusions around .h files could easily lead to inclusion dependency hell. Most gfp.h inclusion directives were ignored as stuff from gfp.h was usually wildly available and often used in preprocessor macros. Each slab.h inclusion directive was examined and added manually as necessary. 6. percpu.h was updated not to include slab.h. 7. Build test were done on the following configurations and failures were fixed. CONFIG_GCOV_KERNEL was turned off for all tests (as my distributed build env didn't work with gcov compiles) and a few more options had to be turned off depending on archs to make things build (like ipr on powerpc/64 which failed due to missing writeq). * x86 and x86_64 UP and SMP allmodconfig and a custom test config. * powerpc and powerpc64 SMP allmodconfig * sparc and sparc64 SMP allmodconfig * ia64 SMP allmodconfig * s390 SMP allmodconfig * alpha SMP allmodconfig * um on x86_64 SMP allmodconfig 8. percpu.h modifications were reverted so that it could be applied as a separate patch and serve as bisection point. Given the fact that I had only a couple of failures from tests on step 6, I'm fairly confident about the coverage of this conversion patch. If there is a breakage, it's likely to be something in one of the arch headers which should be easily discoverable easily on most builds of the specific arch. Signed-off-by: Tejun Heo <tj@kernel.org> Guess-its-ok-by: Christoph Lameter <cl@linux-foundation.org> Cc: Ingo Molnar <mingo@redhat.com> Cc: Lee Schermerhorn <Lee.Schermerhorn@hp.com>
2010-03-24 16:04:11 +08:00
#include <linux/slab.h>
#include <crypto/algapi.h>
#include <crypto/aes.h>
#include <crypto/des.h>
#include <crypto/sha.h>
#include <crypto/md5.h>
#include <crypto/internal/aead.h>
#include <crypto/authenc.h>
#include <crypto/skcipher.h>
#include <crypto/hash.h>
#include <crypto/internal/hash.h>
#include <crypto/scatterwalk.h>
#include "talitos.h"
static void to_talitos_ptr(struct talitos_ptr *ptr, dma_addr_t dma_addr,
unsigned int len, bool is_sec1)
{
ptr->ptr = cpu_to_be32(lower_32_bits(dma_addr));
if (is_sec1) {
ptr->len1 = cpu_to_be16(len);
} else {
ptr->len = cpu_to_be16(len);
ptr->eptr = upper_32_bits(dma_addr);
}
}
static void copy_talitos_ptr(struct talitos_ptr *dst_ptr,
struct talitos_ptr *src_ptr, bool is_sec1)
{
dst_ptr->ptr = src_ptr->ptr;
if (is_sec1) {
dst_ptr->len1 = src_ptr->len1;
} else {
dst_ptr->len = src_ptr->len;
dst_ptr->eptr = src_ptr->eptr;
}
}
static unsigned short from_talitos_ptr_len(struct talitos_ptr *ptr,
bool is_sec1)
{
if (is_sec1)
return be16_to_cpu(ptr->len1);
else
return be16_to_cpu(ptr->len);
}
static void to_talitos_ptr_ext_set(struct talitos_ptr *ptr, u8 val,
bool is_sec1)
{
if (!is_sec1)
ptr->j_extent = val;
}
static void to_talitos_ptr_ext_or(struct talitos_ptr *ptr, u8 val, bool is_sec1)
{
if (!is_sec1)
ptr->j_extent |= val;
}
/*
* map virtual single (contiguous) pointer to h/w descriptor pointer
*/
static void map_single_talitos_ptr(struct device *dev,
struct talitos_ptr *ptr,
unsigned int len, void *data,
enum dma_data_direction dir)
{
dma_addr_t dma_addr = dma_map_single(dev, data, len, dir);
struct talitos_private *priv = dev_get_drvdata(dev);
bool is_sec1 = has_ftr_sec1(priv);
to_talitos_ptr(ptr, dma_addr, len, is_sec1);
}
/*
* unmap bus single (contiguous) h/w descriptor pointer
*/
static void unmap_single_talitos_ptr(struct device *dev,
struct talitos_ptr *ptr,
enum dma_data_direction dir)
{
struct talitos_private *priv = dev_get_drvdata(dev);
bool is_sec1 = has_ftr_sec1(priv);
dma_unmap_single(dev, be32_to_cpu(ptr->ptr),
from_talitos_ptr_len(ptr, is_sec1), dir);
}
static int reset_channel(struct device *dev, int ch)
{
struct talitos_private *priv = dev_get_drvdata(dev);
unsigned int timeout = TALITOS_TIMEOUT;
bool is_sec1 = has_ftr_sec1(priv);
if (is_sec1) {
setbits32(priv->chan[ch].reg + TALITOS_CCCR_LO,
TALITOS1_CCCR_LO_RESET);
while ((in_be32(priv->chan[ch].reg + TALITOS_CCCR_LO) &
TALITOS1_CCCR_LO_RESET) && --timeout)
cpu_relax();
} else {
setbits32(priv->chan[ch].reg + TALITOS_CCCR,
TALITOS2_CCCR_RESET);
while ((in_be32(priv->chan[ch].reg + TALITOS_CCCR) &
TALITOS2_CCCR_RESET) && --timeout)
cpu_relax();
}
if (timeout == 0) {
dev_err(dev, "failed to reset channel %d\n", ch);
return -EIO;
}
/* set 36-bit addressing, done writeback enable and done IRQ enable */
setbits32(priv->chan[ch].reg + TALITOS_CCCR_LO, TALITOS_CCCR_LO_EAE |
TALITOS_CCCR_LO_CDWE | TALITOS_CCCR_LO_CDIE);
crypto: talitos - chain in buffered data for ahash on SEC1 SEC1 doesn't support S/G in descriptors so for hash operations, the CPU has to build a buffer containing the buffered block and the incoming data. This generates a lot of memory copies which represents more than 50% of CPU time of a md5sum operation as shown below with a 'perf record'. |--86.24%-- kcapi_md_digest | | | |--86.18%-- _kcapi_common_vmsplice_chunk_fd | | | | | |--83.68%-- splice | | | | | | | |--83.59%-- ret_from_syscall | | | | | | | | | |--83.52%-- sys_splice | | | | | | | | | | | |--83.49%-- splice_from_pipe | | | | | | | | | | | | | |--83.04%-- __splice_from_pipe | | | | | | | | | | | | | | | |--80.67%-- pipe_to_sendpage | | | | | | | | | | | | | | | | | |--78.25%-- hash_sendpage | | | | | | | | | | | | | | | | | | | |--60.08%-- ahash_process_req | | | | | | | | | | | | | | | | | | | | | |--56.36%-- sg_copy_buffer | | | | | | | | | | | | | | | | | | | | | | | |--55.29%-- memcpy | | | | | | | | | | | | However, unlike SEC2+, SEC1 offers the possibility to chain descriptors. It is therefore possible to build a first descriptor pointing to the buffered data and a second descriptor pointing to the incoming data, hence avoiding the memory copy to a single buffer. With this patch, the time necessary for a md5sum on a 90Mbytes file is approximately 3 seconds. Without the patch it takes 6 seconds. Signed-off-by: Christophe Leroy <christophe.leroy@c-s.fr> Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
2017-10-06 21:05:06 +08:00
/* enable chaining descriptors */
if (is_sec1)
setbits32(priv->chan[ch].reg + TALITOS_CCCR_LO,
TALITOS_CCCR_LO_NE);
/* and ICCR writeback, if available */
if (priv->features & TALITOS_FTR_HW_AUTH_CHECK)
setbits32(priv->chan[ch].reg + TALITOS_CCCR_LO,
TALITOS_CCCR_LO_IWSE);
return 0;
}
static int reset_device(struct device *dev)
{
struct talitos_private *priv = dev_get_drvdata(dev);
unsigned int timeout = TALITOS_TIMEOUT;
bool is_sec1 = has_ftr_sec1(priv);
u32 mcr = is_sec1 ? TALITOS1_MCR_SWR : TALITOS2_MCR_SWR;
setbits32(priv->reg + TALITOS_MCR, mcr);
while ((in_be32(priv->reg + TALITOS_MCR) & mcr)
&& --timeout)
cpu_relax();
if (priv->irq[1]) {
mcr = TALITOS_MCR_RCA1 | TALITOS_MCR_RCA3;
setbits32(priv->reg + TALITOS_MCR, mcr);
}
if (timeout == 0) {
dev_err(dev, "failed to reset device\n");
return -EIO;
}
return 0;
}
/*
* Reset and initialize the device
*/
static int init_device(struct device *dev)
{
struct talitos_private *priv = dev_get_drvdata(dev);
int ch, err;
bool is_sec1 = has_ftr_sec1(priv);
/*
* Master reset
* errata documentation: warning: certain SEC interrupts
* are not fully cleared by writing the MCR:SWR bit,
* set bit twice to completely reset
*/
err = reset_device(dev);
if (err)
return err;
err = reset_device(dev);
if (err)
return err;
/* reset channels */
for (ch = 0; ch < priv->num_channels; ch++) {
err = reset_channel(dev, ch);
if (err)
return err;
}
/* enable channel done and error interrupts */
if (is_sec1) {
clrbits32(priv->reg + TALITOS_IMR, TALITOS1_IMR_INIT);
clrbits32(priv->reg + TALITOS_IMR_LO, TALITOS1_IMR_LO_INIT);
/* disable parity error check in DEU (erroneous? test vect.) */
setbits32(priv->reg_deu + TALITOS_EUICR, TALITOS1_DEUICR_KPE);
} else {
setbits32(priv->reg + TALITOS_IMR, TALITOS2_IMR_INIT);
setbits32(priv->reg + TALITOS_IMR_LO, TALITOS2_IMR_LO_INIT);
}
/* disable integrity check error interrupts (use writeback instead) */
if (priv->features & TALITOS_FTR_HW_AUTH_CHECK)
setbits32(priv->reg_mdeu + TALITOS_EUICR_LO,
TALITOS_MDEUICR_LO_ICE);
return 0;
}
/**
* talitos_submit - submits a descriptor to the device for processing
* @dev: the SEC device to be used
* @ch: the SEC device channel to be used
* @desc: the descriptor to be processed by the device
* @callback: whom to call when processing is complete
* @context: a handle for use by caller (optional)
*
* desc must contain valid dma-mapped (bus physical) address pointers.
* callback must check err and feedback in descriptor header
* for device processing status.
*/
int talitos_submit(struct device *dev, int ch, struct talitos_desc *desc,
void (*callback)(struct device *dev,
struct talitos_desc *desc,
void *context, int error),
void *context)
{
struct talitos_private *priv = dev_get_drvdata(dev);
struct talitos_request *request;
unsigned long flags;
int head;
bool is_sec1 = has_ftr_sec1(priv);
spin_lock_irqsave(&priv->chan[ch].head_lock, flags);
if (!atomic_inc_not_zero(&priv->chan[ch].submit_count)) {
/* h/w fifo is full */
spin_unlock_irqrestore(&priv->chan[ch].head_lock, flags);
return -EAGAIN;
}
head = priv->chan[ch].head;
request = &priv->chan[ch].fifo[head];
/* map descriptor and save caller data */
if (is_sec1) {
desc->hdr1 = desc->hdr;
request->dma_desc = dma_map_single(dev, &desc->hdr1,
TALITOS_DESC_SIZE,
DMA_BIDIRECTIONAL);
} else {
request->dma_desc = dma_map_single(dev, desc,
TALITOS_DESC_SIZE,
DMA_BIDIRECTIONAL);
}
request->callback = callback;
request->context = context;
/* increment fifo head */
priv->chan[ch].head = (priv->chan[ch].head + 1) & (priv->fifo_len - 1);
smp_wmb();
request->desc = desc;
/* GO! */
wmb();
out_be32(priv->chan[ch].reg + TALITOS_FF,
upper_32_bits(request->dma_desc));
out_be32(priv->chan[ch].reg + TALITOS_FF_LO,
lower_32_bits(request->dma_desc));
spin_unlock_irqrestore(&priv->chan[ch].head_lock, flags);
return -EINPROGRESS;
}
EXPORT_SYMBOL(talitos_submit);
/*
* process what was done, notify callback of error if not
*/
static void flush_channel(struct device *dev, int ch, int error, int reset_ch)
{
struct talitos_private *priv = dev_get_drvdata(dev);
struct talitos_request *request, saved_req;
unsigned long flags;
int tail, status;
bool is_sec1 = has_ftr_sec1(priv);
spin_lock_irqsave(&priv->chan[ch].tail_lock, flags);
tail = priv->chan[ch].tail;
while (priv->chan[ch].fifo[tail].desc) {
__be32 hdr;
request = &priv->chan[ch].fifo[tail];
/* descriptors with their done bits set don't get the error */
rmb();
crypto: talitos - chain in buffered data for ahash on SEC1 SEC1 doesn't support S/G in descriptors so for hash operations, the CPU has to build a buffer containing the buffered block and the incoming data. This generates a lot of memory copies which represents more than 50% of CPU time of a md5sum operation as shown below with a 'perf record'. |--86.24%-- kcapi_md_digest | | | |--86.18%-- _kcapi_common_vmsplice_chunk_fd | | | | | |--83.68%-- splice | | | | | | | |--83.59%-- ret_from_syscall | | | | | | | | | |--83.52%-- sys_splice | | | | | | | | | | | |--83.49%-- splice_from_pipe | | | | | | | | | | | | | |--83.04%-- __splice_from_pipe | | | | | | | | | | | | | | | |--80.67%-- pipe_to_sendpage | | | | | | | | | | | | | | | | | |--78.25%-- hash_sendpage | | | | | | | | | | | | | | | | | | | |--60.08%-- ahash_process_req | | | | | | | | | | | | | | | | | | | | | |--56.36%-- sg_copy_buffer | | | | | | | | | | | | | | | | | | | | | | | |--55.29%-- memcpy | | | | | | | | | | | | However, unlike SEC2+, SEC1 offers the possibility to chain descriptors. It is therefore possible to build a first descriptor pointing to the buffered data and a second descriptor pointing to the incoming data, hence avoiding the memory copy to a single buffer. With this patch, the time necessary for a md5sum on a 90Mbytes file is approximately 3 seconds. Without the patch it takes 6 seconds. Signed-off-by: Christophe Leroy <christophe.leroy@c-s.fr> Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
2017-10-06 21:05:06 +08:00
if (!is_sec1)
hdr = request->desc->hdr;
else if (request->desc->next_desc)
hdr = (request->desc + 1)->hdr1;
else
hdr = request->desc->hdr1;
if ((hdr & DESC_HDR_DONE) == DESC_HDR_DONE)
status = 0;
else
if (!error)
break;
else
status = error;
dma_unmap_single(dev, request->dma_desc,
TALITOS_DESC_SIZE,
DMA_BIDIRECTIONAL);
/* copy entries so we can call callback outside lock */
saved_req.desc = request->desc;
saved_req.callback = request->callback;
saved_req.context = request->context;
/* release request entry in fifo */
smp_wmb();
request->desc = NULL;
/* increment fifo tail */
priv->chan[ch].tail = (tail + 1) & (priv->fifo_len - 1);
spin_unlock_irqrestore(&priv->chan[ch].tail_lock, flags);
atomic_dec(&priv->chan[ch].submit_count);
saved_req.callback(dev, saved_req.desc, saved_req.context,
status);
/* channel may resume processing in single desc error case */
if (error && !reset_ch && status == error)
return;
spin_lock_irqsave(&priv->chan[ch].tail_lock, flags);
tail = priv->chan[ch].tail;
}
spin_unlock_irqrestore(&priv->chan[ch].tail_lock, flags);
}
/*
* process completed requests for channels that have done status
*/
#define DEF_TALITOS1_DONE(name, ch_done_mask) \
static void talitos1_done_##name(unsigned long data) \
{ \
struct device *dev = (struct device *)data; \
struct talitos_private *priv = dev_get_drvdata(dev); \
unsigned long flags; \
\
if (ch_done_mask & 0x10000000) \
flush_channel(dev, 0, 0, 0); \
if (ch_done_mask & 0x40000000) \
flush_channel(dev, 1, 0, 0); \
if (ch_done_mask & 0x00010000) \
flush_channel(dev, 2, 0, 0); \
if (ch_done_mask & 0x00040000) \
flush_channel(dev, 3, 0, 0); \
\
/* At this point, all completed channels have been processed */ \
/* Unmask done interrupts for channels completed later on. */ \
spin_lock_irqsave(&priv->reg_lock, flags); \
clrbits32(priv->reg + TALITOS_IMR, ch_done_mask); \
clrbits32(priv->reg + TALITOS_IMR_LO, TALITOS1_IMR_LO_INIT); \
spin_unlock_irqrestore(&priv->reg_lock, flags); \
}
DEF_TALITOS1_DONE(4ch, TALITOS1_ISR_4CHDONE)
DEF_TALITOS1_DONE(ch0, TALITOS1_ISR_CH_0_DONE)
#define DEF_TALITOS2_DONE(name, ch_done_mask) \
static void talitos2_done_##name(unsigned long data) \
{ \
struct device *dev = (struct device *)data; \
struct talitos_private *priv = dev_get_drvdata(dev); \
unsigned long flags; \
\
if (ch_done_mask & 1) \
flush_channel(dev, 0, 0, 0); \
if (ch_done_mask & (1 << 2)) \
flush_channel(dev, 1, 0, 0); \
if (ch_done_mask & (1 << 4)) \
flush_channel(dev, 2, 0, 0); \
if (ch_done_mask & (1 << 6)) \
flush_channel(dev, 3, 0, 0); \
\
/* At this point, all completed channels have been processed */ \
/* Unmask done interrupts for channels completed later on. */ \
spin_lock_irqsave(&priv->reg_lock, flags); \
setbits32(priv->reg + TALITOS_IMR, ch_done_mask); \
setbits32(priv->reg + TALITOS_IMR_LO, TALITOS2_IMR_LO_INIT); \
spin_unlock_irqrestore(&priv->reg_lock, flags); \
}
DEF_TALITOS2_DONE(4ch, TALITOS2_ISR_4CHDONE)
DEF_TALITOS2_DONE(ch0, TALITOS2_ISR_CH_0_DONE)
DEF_TALITOS2_DONE(ch0_2, TALITOS2_ISR_CH_0_2_DONE)
DEF_TALITOS2_DONE(ch1_3, TALITOS2_ISR_CH_1_3_DONE)
/*
* locate current (offending) descriptor
*/
static u32 current_desc_hdr(struct device *dev, int ch)
{
struct talitos_private *priv = dev_get_drvdata(dev);
int tail, iter;
dma_addr_t cur_desc;
cur_desc = ((u64)in_be32(priv->chan[ch].reg + TALITOS_CDPR)) << 32;
cur_desc |= in_be32(priv->chan[ch].reg + TALITOS_CDPR_LO);
if (!cur_desc) {
dev_err(dev, "CDPR is NULL, giving up search for offending descriptor\n");
return 0;
}
tail = priv->chan[ch].tail;
iter = tail;
crypto: talitos - chain in buffered data for ahash on SEC1 SEC1 doesn't support S/G in descriptors so for hash operations, the CPU has to build a buffer containing the buffered block and the incoming data. This generates a lot of memory copies which represents more than 50% of CPU time of a md5sum operation as shown below with a 'perf record'. |--86.24%-- kcapi_md_digest | | | |--86.18%-- _kcapi_common_vmsplice_chunk_fd | | | | | |--83.68%-- splice | | | | | | | |--83.59%-- ret_from_syscall | | | | | | | | | |--83.52%-- sys_splice | | | | | | | | | | | |--83.49%-- splice_from_pipe | | | | | | | | | | | | | |--83.04%-- __splice_from_pipe | | | | | | | | | | | | | | | |--80.67%-- pipe_to_sendpage | | | | | | | | | | | | | | | | | |--78.25%-- hash_sendpage | | | | | | | | | | | | | | | | | | | |--60.08%-- ahash_process_req | | | | | | | | | | | | | | | | | | | | | |--56.36%-- sg_copy_buffer | | | | | | | | | | | | | | | | | | | | | | | |--55.29%-- memcpy | | | | | | | | | | | | However, unlike SEC2+, SEC1 offers the possibility to chain descriptors. It is therefore possible to build a first descriptor pointing to the buffered data and a second descriptor pointing to the incoming data, hence avoiding the memory copy to a single buffer. With this patch, the time necessary for a md5sum on a 90Mbytes file is approximately 3 seconds. Without the patch it takes 6 seconds. Signed-off-by: Christophe Leroy <christophe.leroy@c-s.fr> Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
2017-10-06 21:05:06 +08:00
while (priv->chan[ch].fifo[iter].dma_desc != cur_desc &&
priv->chan[ch].fifo[iter].desc->next_desc != cur_desc) {
iter = (iter + 1) & (priv->fifo_len - 1);
if (iter == tail) {
dev_err(dev, "couldn't locate current descriptor\n");
return 0;
}
}
crypto: talitos - chain in buffered data for ahash on SEC1 SEC1 doesn't support S/G in descriptors so for hash operations, the CPU has to build a buffer containing the buffered block and the incoming data. This generates a lot of memory copies which represents more than 50% of CPU time of a md5sum operation as shown below with a 'perf record'. |--86.24%-- kcapi_md_digest | | | |--86.18%-- _kcapi_common_vmsplice_chunk_fd | | | | | |--83.68%-- splice | | | | | | | |--83.59%-- ret_from_syscall | | | | | | | | | |--83.52%-- sys_splice | | | | | | | | | | | |--83.49%-- splice_from_pipe | | | | | | | | | | | | | |--83.04%-- __splice_from_pipe | | | | | | | | | | | | | | | |--80.67%-- pipe_to_sendpage | | | | | | | | | | | | | | | | | |--78.25%-- hash_sendpage | | | | | | | | | | | | | | | | | | | |--60.08%-- ahash_process_req | | | | | | | | | | | | | | | | | | | | | |--56.36%-- sg_copy_buffer | | | | | | | | | | | | | | | | | | | | | | | |--55.29%-- memcpy | | | | | | | | | | | | However, unlike SEC2+, SEC1 offers the possibility to chain descriptors. It is therefore possible to build a first descriptor pointing to the buffered data and a second descriptor pointing to the incoming data, hence avoiding the memory copy to a single buffer. With this patch, the time necessary for a md5sum on a 90Mbytes file is approximately 3 seconds. Without the patch it takes 6 seconds. Signed-off-by: Christophe Leroy <christophe.leroy@c-s.fr> Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
2017-10-06 21:05:06 +08:00
if (priv->chan[ch].fifo[iter].desc->next_desc == cur_desc)
return (priv->chan[ch].fifo[iter].desc + 1)->hdr;
return priv->chan[ch].fifo[iter].desc->hdr;
}
/*
* user diagnostics; report root cause of error based on execution unit status
*/
static void report_eu_error(struct device *dev, int ch, u32 desc_hdr)
{
struct talitos_private *priv = dev_get_drvdata(dev);
int i;
if (!desc_hdr)
desc_hdr = in_be32(priv->chan[ch].reg + TALITOS_DESCBUF);
switch (desc_hdr & DESC_HDR_SEL0_MASK) {
case DESC_HDR_SEL0_AFEU:
dev_err(dev, "AFEUISR 0x%08x_%08x\n",
in_be32(priv->reg_afeu + TALITOS_EUISR),
in_be32(priv->reg_afeu + TALITOS_EUISR_LO));
break;
case DESC_HDR_SEL0_DEU:
dev_err(dev, "DEUISR 0x%08x_%08x\n",
in_be32(priv->reg_deu + TALITOS_EUISR),
in_be32(priv->reg_deu + TALITOS_EUISR_LO));
break;
case DESC_HDR_SEL0_MDEUA:
case DESC_HDR_SEL0_MDEUB:
dev_err(dev, "MDEUISR 0x%08x_%08x\n",
in_be32(priv->reg_mdeu + TALITOS_EUISR),
in_be32(priv->reg_mdeu + TALITOS_EUISR_LO));
break;
case DESC_HDR_SEL0_RNG:
dev_err(dev, "RNGUISR 0x%08x_%08x\n",
in_be32(priv->reg_rngu + TALITOS_ISR),
in_be32(priv->reg_rngu + TALITOS_ISR_LO));
break;
case DESC_HDR_SEL0_PKEU:
dev_err(dev, "PKEUISR 0x%08x_%08x\n",
in_be32(priv->reg_pkeu + TALITOS_EUISR),
in_be32(priv->reg_pkeu + TALITOS_EUISR_LO));
break;
case DESC_HDR_SEL0_AESU:
dev_err(dev, "AESUISR 0x%08x_%08x\n",
in_be32(priv->reg_aesu + TALITOS_EUISR),
in_be32(priv->reg_aesu + TALITOS_EUISR_LO));
break;
case DESC_HDR_SEL0_CRCU:
dev_err(dev, "CRCUISR 0x%08x_%08x\n",
in_be32(priv->reg_crcu + TALITOS_EUISR),
in_be32(priv->reg_crcu + TALITOS_EUISR_LO));
break;
case DESC_HDR_SEL0_KEU:
dev_err(dev, "KEUISR 0x%08x_%08x\n",
in_be32(priv->reg_pkeu + TALITOS_EUISR),
in_be32(priv->reg_pkeu + TALITOS_EUISR_LO));
break;
}
switch (desc_hdr & DESC_HDR_SEL1_MASK) {
case DESC_HDR_SEL1_MDEUA:
case DESC_HDR_SEL1_MDEUB:
dev_err(dev, "MDEUISR 0x%08x_%08x\n",
in_be32(priv->reg_mdeu + TALITOS_EUISR),
in_be32(priv->reg_mdeu + TALITOS_EUISR_LO));
break;
case DESC_HDR_SEL1_CRCU:
dev_err(dev, "CRCUISR 0x%08x_%08x\n",
in_be32(priv->reg_crcu + TALITOS_EUISR),
in_be32(priv->reg_crcu + TALITOS_EUISR_LO));
break;
}
for (i = 0; i < 8; i++)
dev_err(dev, "DESCBUF 0x%08x_%08x\n",
in_be32(priv->chan[ch].reg + TALITOS_DESCBUF + 8*i),
in_be32(priv->chan[ch].reg + TALITOS_DESCBUF_LO + 8*i));
}
/*
* recover from error interrupts
*/
static void talitos_error(struct device *dev, u32 isr, u32 isr_lo)
{
struct talitos_private *priv = dev_get_drvdata(dev);
unsigned int timeout = TALITOS_TIMEOUT;
int ch, error, reset_dev = 0;
u32 v_lo;
bool is_sec1 = has_ftr_sec1(priv);
int reset_ch = is_sec1 ? 1 : 0; /* only SEC2 supports continuation */
for (ch = 0; ch < priv->num_channels; ch++) {
/* skip channels without errors */
if (is_sec1) {
/* bits 29, 31, 17, 19 */
if (!(isr & (1 << (29 + (ch & 1) * 2 - (ch & 2) * 6))))
continue;
} else {
if (!(isr & (1 << (ch * 2 + 1))))
continue;
}
error = -EINVAL;
v_lo = in_be32(priv->chan[ch].reg + TALITOS_CCPSR_LO);
if (v_lo & TALITOS_CCPSR_LO_DOF) {
dev_err(dev, "double fetch fifo overflow error\n");
error = -EAGAIN;
reset_ch = 1;
}
if (v_lo & TALITOS_CCPSR_LO_SOF) {
/* h/w dropped descriptor */
dev_err(dev, "single fetch fifo overflow error\n");
error = -EAGAIN;
}
if (v_lo & TALITOS_CCPSR_LO_MDTE)
dev_err(dev, "master data transfer error\n");
if (v_lo & TALITOS_CCPSR_LO_SGDLZ)
dev_err(dev, is_sec1 ? "pointer not complete error\n"
: "s/g data length zero error\n");
if (v_lo & TALITOS_CCPSR_LO_FPZ)
dev_err(dev, is_sec1 ? "parity error\n"
: "fetch pointer zero error\n");
if (v_lo & TALITOS_CCPSR_LO_IDH)
dev_err(dev, "illegal descriptor header error\n");
if (v_lo & TALITOS_CCPSR_LO_IEU)
dev_err(dev, is_sec1 ? "static assignment error\n"
: "invalid exec unit error\n");
if (v_lo & TALITOS_CCPSR_LO_EU)
report_eu_error(dev, ch, current_desc_hdr(dev, ch));
if (!is_sec1) {
if (v_lo & TALITOS_CCPSR_LO_GB)
dev_err(dev, "gather boundary error\n");
if (v_lo & TALITOS_CCPSR_LO_GRL)
dev_err(dev, "gather return/length error\n");
if (v_lo & TALITOS_CCPSR_LO_SB)
dev_err(dev, "scatter boundary error\n");
if (v_lo & TALITOS_CCPSR_LO_SRL)
dev_err(dev, "scatter return/length error\n");
}
flush_channel(dev, ch, error, reset_ch);
if (reset_ch) {
reset_channel(dev, ch);
} else {
setbits32(priv->chan[ch].reg + TALITOS_CCCR,
TALITOS2_CCCR_CONT);
setbits32(priv->chan[ch].reg + TALITOS_CCCR_LO, 0);
while ((in_be32(priv->chan[ch].reg + TALITOS_CCCR) &
TALITOS2_CCCR_CONT) && --timeout)
cpu_relax();
if (timeout == 0) {
dev_err(dev, "failed to restart channel %d\n",
ch);
reset_dev = 1;
}
}
}
if (reset_dev || (is_sec1 && isr & ~TALITOS1_ISR_4CHERR) ||
(!is_sec1 && isr & ~TALITOS2_ISR_4CHERR) || isr_lo) {
if (is_sec1 && (isr_lo & TALITOS1_ISR_TEA_ERR))
dev_err(dev, "TEA error: ISR 0x%08x_%08x\n",
isr, isr_lo);
else
dev_err(dev, "done overflow, internal time out, or "
"rngu error: ISR 0x%08x_%08x\n", isr, isr_lo);
/* purge request queues */
for (ch = 0; ch < priv->num_channels; ch++)
flush_channel(dev, ch, -EIO, 1);
/* reset and reinitialize the device */
init_device(dev);
}
}
#define DEF_TALITOS1_INTERRUPT(name, ch_done_mask, ch_err_mask, tlet) \
static irqreturn_t talitos1_interrupt_##name(int irq, void *data) \
{ \
struct device *dev = data; \
struct talitos_private *priv = dev_get_drvdata(dev); \
u32 isr, isr_lo; \
unsigned long flags; \
\
spin_lock_irqsave(&priv->reg_lock, flags); \
isr = in_be32(priv->reg + TALITOS_ISR); \
isr_lo = in_be32(priv->reg + TALITOS_ISR_LO); \
/* Acknowledge interrupt */ \
out_be32(priv->reg + TALITOS_ICR, isr & (ch_done_mask | ch_err_mask)); \
out_be32(priv->reg + TALITOS_ICR_LO, isr_lo); \
\
if (unlikely(isr & ch_err_mask || isr_lo & TALITOS1_IMR_LO_INIT)) { \
spin_unlock_irqrestore(&priv->reg_lock, flags); \
talitos_error(dev, isr & ch_err_mask, isr_lo); \
} \
else { \
if (likely(isr & ch_done_mask)) { \
/* mask further done interrupts. */ \
setbits32(priv->reg + TALITOS_IMR, ch_done_mask); \
/* done_task will unmask done interrupts at exit */ \
tasklet_schedule(&priv->done_task[tlet]); \
} \
spin_unlock_irqrestore(&priv->reg_lock, flags); \
} \
\
return (isr & (ch_done_mask | ch_err_mask) || isr_lo) ? IRQ_HANDLED : \
IRQ_NONE; \
}
DEF_TALITOS1_INTERRUPT(4ch, TALITOS1_ISR_4CHDONE, TALITOS1_ISR_4CHERR, 0)
#define DEF_TALITOS2_INTERRUPT(name, ch_done_mask, ch_err_mask, tlet) \
static irqreturn_t talitos2_interrupt_##name(int irq, void *data) \
{ \
struct device *dev = data; \
struct talitos_private *priv = dev_get_drvdata(dev); \
u32 isr, isr_lo; \
unsigned long flags; \
\
spin_lock_irqsave(&priv->reg_lock, flags); \
isr = in_be32(priv->reg + TALITOS_ISR); \
isr_lo = in_be32(priv->reg + TALITOS_ISR_LO); \
/* Acknowledge interrupt */ \
out_be32(priv->reg + TALITOS_ICR, isr & (ch_done_mask | ch_err_mask)); \
out_be32(priv->reg + TALITOS_ICR_LO, isr_lo); \
\
if (unlikely(isr & ch_err_mask || isr_lo)) { \
spin_unlock_irqrestore(&priv->reg_lock, flags); \
talitos_error(dev, isr & ch_err_mask, isr_lo); \
} \
else { \
if (likely(isr & ch_done_mask)) { \
/* mask further done interrupts. */ \
clrbits32(priv->reg + TALITOS_IMR, ch_done_mask); \
/* done_task will unmask done interrupts at exit */ \
tasklet_schedule(&priv->done_task[tlet]); \
} \
spin_unlock_irqrestore(&priv->reg_lock, flags); \
} \
\
return (isr & (ch_done_mask | ch_err_mask) || isr_lo) ? IRQ_HANDLED : \
IRQ_NONE; \
}
DEF_TALITOS2_INTERRUPT(4ch, TALITOS2_ISR_4CHDONE, TALITOS2_ISR_4CHERR, 0)
DEF_TALITOS2_INTERRUPT(ch0_2, TALITOS2_ISR_CH_0_2_DONE, TALITOS2_ISR_CH_0_2_ERR,
0)
DEF_TALITOS2_INTERRUPT(ch1_3, TALITOS2_ISR_CH_1_3_DONE, TALITOS2_ISR_CH_1_3_ERR,
1)
/*
* hwrng
*/
static int talitos_rng_data_present(struct hwrng *rng, int wait)
{
struct device *dev = (struct device *)rng->priv;
struct talitos_private *priv = dev_get_drvdata(dev);
u32 ofl;
int i;
for (i = 0; i < 20; i++) {
ofl = in_be32(priv->reg_rngu + TALITOS_EUSR_LO) &
TALITOS_RNGUSR_LO_OFL;
if (ofl || !wait)
break;
udelay(10);
}
return !!ofl;
}
static int talitos_rng_data_read(struct hwrng *rng, u32 *data)
{
struct device *dev = (struct device *)rng->priv;
struct talitos_private *priv = dev_get_drvdata(dev);
/* rng fifo requires 64-bit accesses */
*data = in_be32(priv->reg_rngu + TALITOS_EU_FIFO);
*data = in_be32(priv->reg_rngu + TALITOS_EU_FIFO_LO);
return sizeof(u32);
}
static int talitos_rng_init(struct hwrng *rng)
{
struct device *dev = (struct device *)rng->priv;
struct talitos_private *priv = dev_get_drvdata(dev);
unsigned int timeout = TALITOS_TIMEOUT;
setbits32(priv->reg_rngu + TALITOS_EURCR_LO, TALITOS_RNGURCR_LO_SR);
while (!(in_be32(priv->reg_rngu + TALITOS_EUSR_LO)
& TALITOS_RNGUSR_LO_RD)
&& --timeout)
cpu_relax();
if (timeout == 0) {
dev_err(dev, "failed to reset rng hw\n");
return -ENODEV;
}
/* start generating */
setbits32(priv->reg_rngu + TALITOS_EUDSR_LO, 0);
return 0;
}
static int talitos_register_rng(struct device *dev)
{
struct talitos_private *priv = dev_get_drvdata(dev);
int err;
priv->rng.name = dev_driver_string(dev),
priv->rng.init = talitos_rng_init,
priv->rng.data_present = talitos_rng_data_present,
priv->rng.data_read = talitos_rng_data_read,
priv->rng.priv = (unsigned long)dev;
err = hwrng_register(&priv->rng);
if (!err)
priv->rng_registered = true;
return err;
}
static void talitos_unregister_rng(struct device *dev)
{
struct talitos_private *priv = dev_get_drvdata(dev);
if (!priv->rng_registered)
return;
hwrng_unregister(&priv->rng);
priv->rng_registered = false;
}
/*
* crypto alg
*/
#define TALITOS_CRA_PRIORITY 3000
/*
* Defines a priority for doing AEAD with descriptors type
* HMAC_SNOOP_NO_AFEA (HSNA) instead of type IPSEC_ESP
*/
#define TALITOS_CRA_PRIORITY_AEAD_HSNA (TALITOS_CRA_PRIORITY - 1)
#define TALITOS_MAX_KEY_SIZE (AES_MAX_KEY_SIZE + SHA512_BLOCK_SIZE)
#define TALITOS_MAX_IV_LENGTH 16 /* max of AES_BLOCK_SIZE, DES3_EDE_BLOCK_SIZE */
struct talitos_ctx {
struct device *dev;
int ch;
__be32 desc_hdr_template;
u8 key[TALITOS_MAX_KEY_SIZE];
u8 iv[TALITOS_MAX_IV_LENGTH];
dma_addr_t dma_key;
unsigned int keylen;
unsigned int enckeylen;
unsigned int authkeylen;
crypto: talitos - chain in buffered data for ahash on SEC1 SEC1 doesn't support S/G in descriptors so for hash operations, the CPU has to build a buffer containing the buffered block and the incoming data. This generates a lot of memory copies which represents more than 50% of CPU time of a md5sum operation as shown below with a 'perf record'. |--86.24%-- kcapi_md_digest | | | |--86.18%-- _kcapi_common_vmsplice_chunk_fd | | | | | |--83.68%-- splice | | | | | | | |--83.59%-- ret_from_syscall | | | | | | | | | |--83.52%-- sys_splice | | | | | | | | | | | |--83.49%-- splice_from_pipe | | | | | | | | | | | | | |--83.04%-- __splice_from_pipe | | | | | | | | | | | | | | | |--80.67%-- pipe_to_sendpage | | | | | | | | | | | | | | | | | |--78.25%-- hash_sendpage | | | | | | | | | | | | | | | | | | | |--60.08%-- ahash_process_req | | | | | | | | | | | | | | | | | | | | | |--56.36%-- sg_copy_buffer | | | | | | | | | | | | | | | | | | | | | | | |--55.29%-- memcpy | | | | | | | | | | | | However, unlike SEC2+, SEC1 offers the possibility to chain descriptors. It is therefore possible to build a first descriptor pointing to the buffered data and a second descriptor pointing to the incoming data, hence avoiding the memory copy to a single buffer. With this patch, the time necessary for a md5sum on a 90Mbytes file is approximately 3 seconds. Without the patch it takes 6 seconds. Signed-off-by: Christophe Leroy <christophe.leroy@c-s.fr> Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
2017-10-06 21:05:06 +08:00
dma_addr_t dma_buf;
dma_addr_t dma_hw_context;
};
#define HASH_MAX_BLOCK_SIZE SHA512_BLOCK_SIZE
#define TALITOS_MDEU_MAX_CONTEXT_SIZE TALITOS_MDEU_CONTEXT_SIZE_SHA384_SHA512
struct talitos_ahash_req_ctx {
u32 hw_context[TALITOS_MDEU_MAX_CONTEXT_SIZE / sizeof(u32)];
unsigned int hw_context_size;
u8 buf[2][HASH_MAX_BLOCK_SIZE];
int buf_idx;
unsigned int swinit;
unsigned int first;
unsigned int last;
unsigned int to_hash_later;
unsigned int nbuf;
struct scatterlist bufsl[2];
struct scatterlist *psrc;
};
struct talitos_export_state {
u32 hw_context[TALITOS_MDEU_MAX_CONTEXT_SIZE / sizeof(u32)];
u8 buf[HASH_MAX_BLOCK_SIZE];
unsigned int swinit;
unsigned int first;
unsigned int last;
unsigned int to_hash_later;
unsigned int nbuf;
};
static int aead_setkey(struct crypto_aead *authenc,
const u8 *key, unsigned int keylen)
{
struct talitos_ctx *ctx = crypto_aead_ctx(authenc);
struct device *dev = ctx->dev;
struct crypto_authenc_keys keys;
if (crypto_authenc_extractkeys(&keys, key, keylen) != 0)
goto badkey;
if (keys.authkeylen + keys.enckeylen > TALITOS_MAX_KEY_SIZE)
goto badkey;
if (ctx->keylen)
dma_unmap_single(dev, ctx->dma_key, ctx->keylen, DMA_TO_DEVICE);
memcpy(ctx->key, keys.authkey, keys.authkeylen);
memcpy(&ctx->key[keys.authkeylen], keys.enckey, keys.enckeylen);
ctx->keylen = keys.authkeylen + keys.enckeylen;
ctx->enckeylen = keys.enckeylen;
ctx->authkeylen = keys.authkeylen;
ctx->dma_key = dma_map_single(dev, ctx->key, ctx->keylen,
DMA_TO_DEVICE);
return 0;
badkey:
crypto_aead_set_flags(authenc, CRYPTO_TFM_RES_BAD_KEY_LEN);
return -EINVAL;
}
/*
* talitos_edesc - s/w-extended descriptor
* @src_nents: number of segments in input scatterlist
* @dst_nents: number of segments in output scatterlist
* @icv_ool: whether ICV is out-of-line
* @iv_dma: dma address of iv for checking continuity and link table
* @dma_len: length of dma mapped link_tbl space
* @dma_link_tbl: bus physical address of link_tbl/buf
* @desc: h/w descriptor
* @link_tbl: input and output h/w link tables (if {src,dst}_nents > 1) (SEC2)
* @buf: input and output buffeur (if {src,dst}_nents > 1) (SEC1)
*
* if decrypting (with authcheck), or either one of src_nents or dst_nents
* is greater than 1, an integrity check value is concatenated to the end
* of link_tbl data
*/
struct talitos_edesc {
int src_nents;
int dst_nents;
bool icv_ool;
dma_addr_t iv_dma;
int dma_len;
dma_addr_t dma_link_tbl;
struct talitos_desc desc;
union {
struct talitos_ptr link_tbl[0];
u8 buf[0];
};
};
static void talitos_sg_unmap(struct device *dev,
struct talitos_edesc *edesc,
struct scatterlist *src,
struct scatterlist *dst,
unsigned int len, unsigned int offset)
{
struct talitos_private *priv = dev_get_drvdata(dev);
bool is_sec1 = has_ftr_sec1(priv);
unsigned int src_nents = edesc->src_nents ? : 1;
unsigned int dst_nents = edesc->dst_nents ? : 1;
if (is_sec1 && dst && dst_nents > 1) {
dma_sync_single_for_device(dev, edesc->dma_link_tbl + offset,
len, DMA_FROM_DEVICE);
sg_pcopy_from_buffer(dst, dst_nents, edesc->buf + offset, len,
offset);
}
if (src != dst) {
if (src_nents == 1 || !is_sec1)
dma_unmap_sg(dev, src, src_nents, DMA_TO_DEVICE);
if (dst && (dst_nents == 1 || !is_sec1))
dma_unmap_sg(dev, dst, dst_nents, DMA_FROM_DEVICE);
} else if (src_nents == 1 || !is_sec1) {
dma_unmap_sg(dev, src, src_nents, DMA_BIDIRECTIONAL);
}
}
static void ipsec_esp_unmap(struct device *dev,
struct talitos_edesc *edesc,
struct aead_request *areq)
{
struct crypto_aead *aead = crypto_aead_reqtfm(areq);
struct talitos_ctx *ctx = crypto_aead_ctx(aead);
unsigned int ivsize = crypto_aead_ivsize(aead);
bool is_ipsec_esp = edesc->desc.hdr & DESC_HDR_TYPE_IPSEC_ESP;
struct talitos_ptr *civ_ptr = &edesc->desc.ptr[is_ipsec_esp ? 2 : 3];
if (is_ipsec_esp)
unmap_single_talitos_ptr(dev, &edesc->desc.ptr[6],
DMA_FROM_DEVICE);
unmap_single_talitos_ptr(dev, civ_ptr, DMA_TO_DEVICE);
talitos_sg_unmap(dev, edesc, areq->src, areq->dst, areq->cryptlen,
areq->assoclen);
if (edesc->dma_len)
dma_unmap_single(dev, edesc->dma_link_tbl, edesc->dma_len,
DMA_BIDIRECTIONAL);
if (!is_ipsec_esp) {
unsigned int dst_nents = edesc->dst_nents ? : 1;
sg_pcopy_to_buffer(areq->dst, dst_nents, ctx->iv, ivsize,
areq->assoclen + areq->cryptlen - ivsize);
}
}
/*
* ipsec_esp descriptor callbacks
*/
static void ipsec_esp_encrypt_done(struct device *dev,
struct talitos_desc *desc, void *context,
int err)
{
struct talitos_private *priv = dev_get_drvdata(dev);
bool is_sec1 = has_ftr_sec1(priv);
struct aead_request *areq = context;
struct crypto_aead *authenc = crypto_aead_reqtfm(areq);
unsigned int authsize = crypto_aead_authsize(authenc);
unsigned int ivsize = crypto_aead_ivsize(authenc);
struct talitos_edesc *edesc;
struct scatterlist *sg;
void *icvdata;
edesc = container_of(desc, struct talitos_edesc, desc);
ipsec_esp_unmap(dev, edesc, areq);
/* copy the generated ICV to dst */
if (edesc->icv_ool) {
if (is_sec1)
icvdata = edesc->buf + areq->assoclen + areq->cryptlen;
else
icvdata = &edesc->link_tbl[edesc->src_nents +
edesc->dst_nents + 2];
sg = sg_last(areq->dst, edesc->dst_nents);
memcpy((char *)sg_virt(sg) + sg->length - authsize,
icvdata, authsize);
}
dma_unmap_single(dev, edesc->iv_dma, ivsize, DMA_TO_DEVICE);
kfree(edesc);
aead_request_complete(areq, err);
}
static void ipsec_esp_decrypt_swauth_done(struct device *dev,
struct talitos_desc *desc,
void *context, int err)
{
struct aead_request *req = context;
struct crypto_aead *authenc = crypto_aead_reqtfm(req);
unsigned int authsize = crypto_aead_authsize(authenc);
struct talitos_edesc *edesc;
struct scatterlist *sg;
char *oicv, *icv;
struct talitos_private *priv = dev_get_drvdata(dev);
bool is_sec1 = has_ftr_sec1(priv);
edesc = container_of(desc, struct talitos_edesc, desc);
ipsec_esp_unmap(dev, edesc, req);
if (!err) {
/* auth check */
sg = sg_last(req->dst, edesc->dst_nents ? : 1);
icv = (char *)sg_virt(sg) + sg->length - authsize;
if (edesc->dma_len) {
if (is_sec1)
oicv = (char *)&edesc->dma_link_tbl +
req->assoclen + req->cryptlen;
else
oicv = (char *)
&edesc->link_tbl[edesc->src_nents +
edesc->dst_nents + 2];
if (edesc->icv_ool)
icv = oicv + authsize;
} else
oicv = (char *)&edesc->link_tbl[0];
err = crypto_memneq(oicv, icv, authsize) ? -EBADMSG : 0;
}
kfree(edesc);
aead_request_complete(req, err);
}
static void ipsec_esp_decrypt_hwauth_done(struct device *dev,
struct talitos_desc *desc,
void *context, int err)
{
struct aead_request *req = context;
struct talitos_edesc *edesc;
edesc = container_of(desc, struct talitos_edesc, desc);
ipsec_esp_unmap(dev, edesc, req);
/* check ICV auth status */
if (!err && ((desc->hdr_lo & DESC_HDR_LO_ICCR1_MASK) !=
DESC_HDR_LO_ICCR1_PASS))
err = -EBADMSG;
kfree(edesc);
aead_request_complete(req, err);
}
/*
* convert scatterlist to SEC h/w link table format
* stop at cryptlen bytes
*/
static int sg_to_link_tbl_offset(struct scatterlist *sg, int sg_count,
unsigned int offset, int cryptlen,
struct talitos_ptr *link_tbl_ptr)
{
int n_sg = sg_count;
int count = 0;
while (cryptlen && sg && n_sg--) {
unsigned int len = sg_dma_len(sg);
if (offset >= len) {
offset -= len;
goto next;
}
len -= offset;
if (len > cryptlen)
len = cryptlen;
to_talitos_ptr(link_tbl_ptr + count,
sg_dma_address(sg) + offset, len, 0);
to_talitos_ptr_ext_set(link_tbl_ptr + count, 0, 0);
count++;
cryptlen -= len;
offset = 0;
next:
sg = sg_next(sg);
}
/* tag end of link table */
if (count > 0)
to_talitos_ptr_ext_set(link_tbl_ptr + count - 1,
DESC_PTR_LNKTBL_RETURN, 0);
return count;
}
static int talitos_sg_map(struct device *dev, struct scatterlist *src,
unsigned int len, struct talitos_edesc *edesc,
struct talitos_ptr *ptr,
int sg_count, unsigned int offset, int tbl_off)
{
struct talitos_private *priv = dev_get_drvdata(dev);
bool is_sec1 = has_ftr_sec1(priv);
crypto: talitos - fix Kernel Oops on hashing an empty file Performing the hash of an empty file leads to a kernel Oops [ 44.504600] Unable to handle kernel paging request for data at address 0x0000000c [ 44.512819] Faulting instruction address: 0xc02d2be8 [ 44.524088] Oops: Kernel access of bad area, sig: 11 [#1] [ 44.529171] BE PREEMPT CMPC885 [ 44.532232] CPU: 0 PID: 491 Comm: md5sum Not tainted 4.15.0-rc8-00211-g3a968610b6ea #81 [ 44.540814] NIP: c02d2be8 LR: c02d2984 CTR: 00000000 [ 44.545812] REGS: c6813c90 TRAP: 0300 Not tainted (4.15.0-rc8-00211-g3a968610b6ea) [ 44.554223] MSR: 00009032 <EE,ME,IR,DR,RI> CR: 48222822 XER: 20000000 [ 44.560855] DAR: 0000000c DSISR: c0000000 [ 44.560855] GPR00: c02d28fc c6813d40 c6828000 c646fa40 00000001 00000001 00000001 00000000 [ 44.560855] GPR08: 0000004c 00000000 c000bfcc 00000000 28222822 100280d4 00000000 10020008 [ 44.560855] GPR16: 00000000 00000020 00000000 00000000 10024008 00000000 c646f9f0 c6179a10 [ 44.560855] GPR24: 00000000 00000001 c62f0018 c6179a10 00000000 c6367a30 c62f0000 c646f9c0 [ 44.598542] NIP [c02d2be8] ahash_process_req+0x448/0x700 [ 44.603751] LR [c02d2984] ahash_process_req+0x1e4/0x700 [ 44.608868] Call Trace: [ 44.611329] [c6813d40] [c02d28fc] ahash_process_req+0x15c/0x700 (unreliable) [ 44.618302] [c6813d90] [c02060c4] hash_recvmsg+0x11c/0x210 [ 44.623716] [c6813db0] [c0331354] ___sys_recvmsg+0x98/0x138 [ 44.629226] [c6813eb0] [c03332c0] __sys_recvmsg+0x40/0x84 [ 44.634562] [c6813f10] [c03336c0] SyS_socketcall+0xb8/0x1d4 [ 44.640073] [c6813f40] [c000d1ac] ret_from_syscall+0x0/0x38 [ 44.645530] Instruction dump: [ 44.648465] 38c00001 7f63db78 4e800421 7c791b78 54690ffe 0f090000 80ff0190 2f870000 [ 44.656122] 40befe50 2f990001 409e0210 813f01bc <8129000c> b39e003a 7d29c214 913e003c This patch fixes that Oops by checking if src is NULL. Fixes: 6a1e8d14156d4 ("crypto: talitos - making mapping helpers more generic") Cc: <stable@vger.kernel.org> Signed-off-by: Christophe Leroy <christophe.leroy@c-s.fr> Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
2018-01-27 00:09:59 +08:00
if (!src) {
to_talitos_ptr(ptr, 0, 0, is_sec1);
return 1;
}
if (sg_count == 1) {
to_talitos_ptr(ptr, sg_dma_address(src) + offset, len, is_sec1);
return sg_count;
}
if (is_sec1) {
to_talitos_ptr(ptr, edesc->dma_link_tbl + offset, len, is_sec1);
return sg_count;
}
sg_count = sg_to_link_tbl_offset(src, sg_count, offset, len,
&edesc->link_tbl[tbl_off]);
if (sg_count == 1) {
/* Only one segment now, so no link tbl needed*/
copy_talitos_ptr(ptr, &edesc->link_tbl[tbl_off], is_sec1);
return sg_count;
}
to_talitos_ptr(ptr, edesc->dma_link_tbl +
tbl_off * sizeof(struct talitos_ptr), len, is_sec1);
to_talitos_ptr_ext_or(ptr, DESC_PTR_LNKTBL_JUMP, is_sec1);
return sg_count;
}
/*
* fill in and submit ipsec_esp descriptor
*/
static int ipsec_esp(struct talitos_edesc *edesc, struct aead_request *areq,
void (*callback)(struct device *dev,
struct talitos_desc *desc,
void *context, int error))
{
struct crypto_aead *aead = crypto_aead_reqtfm(areq);
unsigned int authsize = crypto_aead_authsize(aead);
struct talitos_ctx *ctx = crypto_aead_ctx(aead);
struct device *dev = ctx->dev;
struct talitos_desc *desc = &edesc->desc;
unsigned int cryptlen = areq->cryptlen;
unsigned int ivsize = crypto_aead_ivsize(aead);
int tbl_off = 0;
int sg_count, ret;
int sg_link_tbl_len;
bool sync_needed = false;
struct talitos_private *priv = dev_get_drvdata(dev);
bool is_sec1 = has_ftr_sec1(priv);
bool is_ipsec_esp = desc->hdr & DESC_HDR_TYPE_IPSEC_ESP;
struct talitos_ptr *civ_ptr = &desc->ptr[is_ipsec_esp ? 2 : 3];
struct talitos_ptr *ckey_ptr = &desc->ptr[is_ipsec_esp ? 3 : 2];
/* hmac key */
to_talitos_ptr(&desc->ptr[0], ctx->dma_key, ctx->authkeylen, is_sec1);
sg_count = edesc->src_nents ?: 1;
if (is_sec1 && sg_count > 1)
sg_copy_to_buffer(areq->src, sg_count, edesc->buf,
areq->assoclen + cryptlen);
else
sg_count = dma_map_sg(dev, areq->src, sg_count,
(areq->src == areq->dst) ?
DMA_BIDIRECTIONAL : DMA_TO_DEVICE);
/* hmac data */
ret = talitos_sg_map(dev, areq->src, areq->assoclen, edesc,
&desc->ptr[1], sg_count, 0, tbl_off);
if (ret > 1) {
tbl_off += ret;
sync_needed = true;
}
/* cipher iv */
to_talitos_ptr(civ_ptr, edesc->iv_dma, ivsize, is_sec1);
/* cipher key */
to_talitos_ptr(ckey_ptr, ctx->dma_key + ctx->authkeylen,
ctx->enckeylen, is_sec1);
/*
* cipher in
* map and adjust cipher len to aead request cryptlen.
* extent is bytes of HMAC postpended to ciphertext,
* typically 12 for ipsec
*/
sg_link_tbl_len = cryptlen;
if (is_ipsec_esp) {
to_talitos_ptr_ext_set(&desc->ptr[4], authsize, is_sec1);
if (desc->hdr & DESC_HDR_MODE1_MDEU_CICV)
sg_link_tbl_len += authsize;
}
ret = talitos_sg_map(dev, areq->src, sg_link_tbl_len, edesc,
&desc->ptr[4], sg_count, areq->assoclen, tbl_off);
crypto: talitos - fix AEAD test failures AEAD tests fail when destination SG list has more than 1 element. [ 2.058752] alg: aead: Test 1 failed on encryption for authenc-hmac-sha1-cbc-aes-talitos [ 2.066965] 00000000: 53 69 6e 67 6c 65 20 62 6c 6f 63 6b 20 6d 73 67 00000010: c0 43 ff 74 c0 43 ff e0 de 83 d1 20 de 84 8e 54 00000020: de 83 d7 c4 [ 2.082138] alg: aead: Test 1 failed on encryption for authenc-hmac-sha1-cbc-aes-talitos [ 2.090435] 00000000: 53 69 6e 67 6c 65 20 62 6c 6f 63 6b 20 6d 73 67 00000010: de 84 ea 58 c0 93 1a 24 de 84 e8 59 de 84 f1 20 00000020: 00 00 00 00 [ 2.105721] alg: aead: Test 1 failed on encryption for authenc-hmac-sha1-cbc-3des-talitos [ 2.114259] 00000000: 6f 54 20 6f 61 4d 79 6e 53 20 63 65 65 72 73 74 00000010: 54 20 6f 6f 4d 20 6e 61 20 79 65 53 72 63 74 65 00000020: 20 73 6f 54 20 6f 61 4d 79 6e 53 20 63 65 65 72 00000030: 73 74 54 20 6f 6f 4d 20 6e 61 20 79 65 53 72 63 00000040: 74 65 20 73 6f 54 20 6f 61 4d 79 6e 53 20 63 65 00000050: 65 72 73 74 54 20 6f 6f 4d 20 6e 61 20 79 65 53 00000060: 72 63 74 65 20 73 6f 54 20 6f 61 4d 79 6e 53 20 00000070: 63 65 65 72 73 74 54 20 6f 6f 4d 20 6e 61 0a 79 00000080: c0 50 f1 ac c0 50 f3 38 c0 50 f3 94 c0 50 f5 30 00000090: c0 99 74 3c [ 2.166410] alg: aead: Test 1 failed on encryption for authenc-hmac-sha1-cbc-3des-talitos [ 2.174794] 00000000: 6f 54 20 6f 61 4d 79 6e 53 20 63 65 65 72 73 74 00000010: 54 20 6f 6f 4d 20 6e 61 20 79 65 53 72 63 74 65 00000020: 20 73 6f 54 20 6f 61 4d 79 6e 53 20 63 65 65 72 00000030: 73 74 54 20 6f 6f 4d 20 6e 61 20 79 65 53 72 63 00000040: 74 65 20 73 6f 54 20 6f 61 4d 79 6e 53 20 63 65 00000050: 65 72 73 74 54 20 6f 6f 4d 20 6e 61 20 79 65 53 00000060: 72 63 74 65 20 73 6f 54 20 6f 61 4d 79 6e 53 20 00000070: 63 65 65 72 73 74 54 20 6f 6f 4d 20 6e 61 0a 79 00000080: c0 50 f1 ac c0 50 f3 38 c0 50 f3 94 c0 50 f5 30 00000090: c0 99 74 3c [ 2.226486] alg: No test for authenc(hmac(sha224),cbc(aes)) (authenc-hmac-sha224-cbc-aes-talitos) [ 2.236459] alg: No test for authenc(hmac(sha224),cbc(aes)) (authenc-hmac-sha224-cbc-aes-talitos) [ 2.247196] alg: aead: Test 1 failed on encryption for authenc-hmac-sha224-cbc-3des-talitos [ 2.255555] 00000000: 6f 54 20 6f 61 4d 79 6e 53 20 63 65 65 72 73 74 00000010: 54 20 6f 6f 4d 20 6e 61 20 79 65 53 72 63 74 65 00000020: 20 73 6f 54 20 6f 61 4d 79 6e 53 20 63 65 65 72 00000030: 73 74 54 20 6f 6f 4d 20 6e 61 20 79 65 53 72 63 00000040: 74 65 20 73 6f 54 20 6f 61 4d 79 6e 53 20 63 65 00000050: 65 72 73 74 54 20 6f 6f 4d 20 6e 61 20 79 65 53 00000060: 72 63 74 65 20 73 6f 54 20 6f 61 4d 79 6e 53 20 00000070: 63 65 65 72 73 74 54 20 6f 6f 4d 20 6e 61 0a 79 00000080: c0 50 f1 ac c0 50 f3 38 c0 50 f3 94 c0 50 f5 30 00000090: c0 99 74 3c c0 96 e5 b8 [ 2.309004] alg: aead: Test 1 failed on encryption for authenc-hmac-sha224-cbc-3des-talitos [ 2.317562] 00000000: 6f 54 20 6f 61 4d 79 6e 53 20 63 65 65 72 73 74 00000010: 54 20 6f 6f 4d 20 6e 61 20 79 65 53 72 63 74 65 00000020: 20 73 6f 54 20 6f 61 4d 79 6e 53 20 63 65 65 72 00000030: 73 74 54 20 6f 6f 4d 20 6e 61 20 79 65 53 72 63 00000040: 74 65 20 73 6f 54 20 6f 61 4d 79 6e 53 20 63 65 00000050: 65 72 73 74 54 20 6f 6f 4d 20 6e 61 20 79 65 53 00000060: 72 63 74 65 20 73 6f 54 20 6f 61 4d 79 6e 53 20 00000070: 63 65 65 72 73 74 54 20 6f 6f 4d 20 6e 61 0a 79 00000080: c0 50 f1 ac c0 50 f3 38 c0 50 f3 94 c0 50 f5 30 00000090: c0 99 74 3c c0 96 e5 b8 [ 2.370710] alg: aead: Test 1 failed on encryption for authenc-hmac-sha256-cbc-aes-talitos [ 2.379177] 00000000: 53 69 6e 67 6c 65 20 62 6c 6f 63 6b 20 6d 73 67 00000010: 54 20 6f 6f 4d 20 6e 61 20 79 65 53 72 63 74 65 00000020: 20 73 6f 54 20 6f 61 4d 79 6e 53 20 63 65 65 72 [ 2.397863] alg: aead: Test 1 failed on encryption for authenc-hmac-sha256-cbc-aes-talitos [ 2.406134] 00000000: 53 69 6e 67 6c 65 20 62 6c 6f 63 6b 20 6d 73 67 00000010: 54 20 6f 6f 4d 20 6e 61 20 79 65 53 72 63 74 65 00000020: 20 73 6f 54 20 6f 61 4d 79 6e 53 20 63 65 65 72 [ 2.424789] alg: aead: Test 1 failed on encryption for authenc-hmac-sha256-cbc-3des-talitos [ 2.433491] 00000000: 6f 54 20 6f 61 4d 79 6e 53 20 63 65 65 72 73 74 00000010: 54 20 6f 6f 4d 20 6e 61 20 79 65 53 72 63 74 65 00000020: 20 73 6f 54 20 6f 61 4d 79 6e 53 20 63 65 65 72 00000030: 73 74 54 20 6f 6f 4d 20 6e 61 20 79 65 53 72 63 00000040: 74 65 20 73 6f 54 20 6f 61 4d 79 6e 53 20 63 65 00000050: 65 72 73 74 54 20 6f 6f 4d 20 6e 61 20 79 65 53 00000060: 72 63 74 65 20 73 6f 54 20 6f 61 4d 79 6e 53 20 00000070: 63 65 65 72 73 74 54 20 6f 6f 4d 20 6e 61 0a 79 00000080: c0 50 f1 ac c0 50 f3 38 c0 50 f3 94 c0 50 f5 30 00000090: c0 99 74 3c c0 96 e5 b8 c0 96 e9 20 c0 00 3d dc [ 2.488832] alg: aead: Test 1 failed on encryption for authenc-hmac-sha256-cbc-3des-talitos [ 2.497387] 00000000: 6f 54 20 6f 61 4d 79 6e 53 20 63 65 65 72 73 74 00000010: 54 20 6f 6f 4d 20 6e 61 20 79 65 53 72 63 74 65 00000020: 20 73 6f 54 20 6f 61 4d 79 6e 53 20 63 65 65 72 00000030: 73 74 54 20 6f 6f 4d 20 6e 61 20 79 65 53 72 63 00000040: 74 65 20 73 6f 54 20 6f 61 4d 79 6e 53 20 63 65 00000050: 65 72 73 74 54 20 6f 6f 4d 20 6e 61 20 79 65 53 00000060: 72 63 74 65 20 73 6f 54 20 6f 61 4d 79 6e 53 20 00000070: 63 65 65 72 73 74 54 20 6f 6f 4d 20 6e 61 0a 79 00000080: c0 50 f1 ac c0 50 f3 38 c0 50 f3 94 c0 50 f5 30 00000090: c0 99 74 3c c0 96 e5 b8 c0 96 e9 20 c0 00 3d dc This patch fixes that. Signed-off-by: Christophe Leroy <christophe.leroy@c-s.fr> Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
2017-10-06 21:04:33 +08:00
if (ret > 1) {
tbl_off += ret;
sync_needed = true;
}
/* cipher out */
if (areq->src != areq->dst) {
sg_count = edesc->dst_nents ? : 1;
if (!is_sec1 || sg_count == 1)
dma_map_sg(dev, areq->dst, sg_count, DMA_FROM_DEVICE);
}
crypto: talitos - fix memory corruption on SEC2 On SEC2, when using the old descriptors type (hmac snoop no afeu) for doing IPsec, the CICV out pointeur points out of the allocated memory. [ 2.502554] ============================================================================= [ 2.510740] BUG dma-kmalloc-256 (Not tainted): Redzone overwritten [ 2.516907] ----------------------------------------------------------------------------- [ 2.516907] [ 2.526535] Disabling lock debugging due to kernel taint [ 2.531845] INFO: 0xde858108-0xde85810b. First byte 0xf8 instead of 0xcc [ 2.538549] INFO: Allocated in 0x806181a9 age=0 cpu=0 pid=58 [ 2.544229] __kmalloc+0x374/0x564 [ 2.547649] talitos_edesc_alloc+0x17c/0x48c [ 2.551929] aead_edesc_alloc+0x80/0x154 [ 2.555863] aead_encrypt+0x30/0xe0 [ 2.559368] __test_aead+0x5a0/0x1f3c [ 2.563042] test_aead+0x2c/0x110 [ 2.566371] alg_test_aead+0x5c/0xf4 [ 2.569958] alg_test+0x1dc/0x5a0 [ 2.573305] cryptomgr_test+0x50/0x70 [ 2.576984] kthread+0xd8/0x134 [ 2.580155] ret_from_kernel_thread+0x5c/0x64 [ 2.584534] INFO: Freed in ipsec_esp_encrypt_done+0x130/0x240 age=6 cpu=0 pid=0 [ 2.591839] ipsec_esp_encrypt_done+0x130/0x240 [ 2.596395] flush_channel+0x1dc/0x488 [ 2.600161] talitos2_done_4ch+0x30/0x200 [ 2.604185] tasklet_action+0xa0/0x13c [ 2.607948] __do_softirq+0x148/0x6cc [ 2.611623] irq_exit+0xc0/0x124 [ 2.614869] call_do_irq+0x24/0x3c [ 2.618292] do_IRQ+0x78/0x108 [ 2.621369] ret_from_except+0x0/0x14 [ 2.625055] finish_task_switch+0x58/0x350 [ 2.629165] schedule+0x80/0x134 [ 2.632409] schedule_preempt_disabled+0x38/0xc8 [ 2.637042] cpu_startup_entry+0xe4/0x190 [ 2.641074] start_kernel+0x3f4/0x408 [ 2.644741] 0x3438 [ 2.646857] INFO: Slab 0xdffbdb00 objects=9 used=1 fp=0xde8581c0 flags=0x0080 [ 2.653978] INFO: Object 0xde858008 @offset=8 fp=0xca4395df [ 2.653978] [ 2.661032] Redzone de858000: cc cc cc cc cc cc cc cc ........ [ 2.669029] Object de858008: 00 00 00 02 00 00 00 02 00 6b 6b 6b 1e 83 ea 28 .........kkk...( [ 2.677628] Object de858018: 00 00 00 70 1e 85 80 64 ff 73 1d 21 6b 6b 6b 6b ...p...d.s.!kkkk [ 2.686228] Object de858028: 00 20 00 00 1e 84 17 24 00 10 00 00 1e 85 70 00 . .....$......p. [ 2.694829] Object de858038: 00 18 00 00 1e 84 17 44 00 08 00 00 1e 83 ea 28 .......D.......( [ 2.703430] Object de858048: 00 80 00 00 1e 84 f0 00 00 80 00 00 1e 85 70 10 ..............p. [ 2.712030] Object de858058: 00 20 6b 00 1e 85 80 f4 6b 6b 6b 6b 00 80 02 00 . k.....kkkk.... [ 2.720629] Object de858068: 1e 84 f0 00 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b ....kkkkkkkkkkkk [ 2.729230] Object de858078: 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b kkkkkkkkkkkkkkkk [ 2.737830] Object de858088: 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b kkkkkkkkkkkkkkkk [ 2.746429] Object de858098: 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b kkkkkkkkkkkkkkkk [ 2.755029] Object de8580a8: 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b kkkkkkkkkkkkkkkk [ 2.763628] Object de8580b8: 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b kkkkkkkkkkkkkkkk [ 2.772229] Object de8580c8: 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b kkkkkkkkkkkkkkkk [ 2.780829] Object de8580d8: 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b kkkkkkkkkkkkkkkk [ 2.789430] Object de8580e8: 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b 73 b0 ea 9f kkkkkkkkkkkks... [ 2.798030] Object de8580f8: e8 18 80 d6 56 38 44 c0 db e3 4f 71 f7 ce d1 d3 ....V8D...Oq.... [ 2.806629] Redzone de858108: f8 bd 3e 4f ..>O [ 2.814279] Padding de8581b0: 5a 5a 5a 5a 5a 5a 5a 5a ZZZZZZZZ [ 2.822283] CPU: 0 PID: 0 Comm: swapper Tainted: G B 4.9.50-g995be12679 #179 [ 2.831819] Call Trace: [ 2.834301] [dffefd20] [c01aa9a8] check_bytes_and_report+0x100/0x194 (unreliable) [ 2.841801] [dffefd50] [c01aac3c] check_object+0x200/0x530 [ 2.847306] [dffefd80] [c01ae584] free_debug_processing+0x290/0x690 [ 2.853585] [dffefde0] [c01aec8c] __slab_free+0x308/0x628 [ 2.859000] [dffefe80] [c05057f4] ipsec_esp_encrypt_done+0x130/0x240 [ 2.865378] [dffefeb0] [c05002c4] flush_channel+0x1dc/0x488 [ 2.870968] [dffeff10] [c05007a8] talitos2_done_4ch+0x30/0x200 [ 2.876814] [dffeff30] [c002fe38] tasklet_action+0xa0/0x13c [ 2.882399] [dffeff60] [c002f118] __do_softirq+0x148/0x6cc [ 2.887896] [dffeffd0] [c002f954] irq_exit+0xc0/0x124 [ 2.892968] [dffefff0] [c0013adc] call_do_irq+0x24/0x3c [ 2.898213] [c0d4be00] [c000757c] do_IRQ+0x78/0x108 [ 2.903113] [c0d4be30] [c0015c08] ret_from_except+0x0/0x14 [ 2.908634] --- interrupt: 501 at finish_task_switch+0x70/0x350 [ 2.908634] LR = finish_task_switch+0x58/0x350 [ 2.919327] [c0d4bf20] [c085e1d4] schedule+0x80/0x134 [ 2.924398] [c0d4bf50] [c085e2c0] schedule_preempt_disabled+0x38/0xc8 [ 2.930853] [c0d4bf60] [c007f064] cpu_startup_entry+0xe4/0x190 [ 2.936707] [c0d4bfb0] [c096c434] start_kernel+0x3f4/0x408 [ 2.942198] [c0d4bff0] [00003438] 0x3438 [ 2.946137] FIX dma-kmalloc-256: Restoring 0xde858108-0xde85810b=0xcc [ 2.946137] [ 2.954158] FIX dma-kmalloc-256: Object at 0xde858008 not freed This patch reworks the handling of the CICV out in order to properly handle all cases. Signed-off-by: Christophe Leroy <christophe.leroy@c-s.fr> Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
2017-10-06 21:04:35 +08:00
ret = talitos_sg_map(dev, areq->dst, cryptlen, edesc, &desc->ptr[5],
sg_count, areq->assoclen, tbl_off);
if (is_ipsec_esp)
to_talitos_ptr_ext_or(&desc->ptr[5], authsize, is_sec1);
crypto: talitos - fix memory corruption on SEC2 On SEC2, when using the old descriptors type (hmac snoop no afeu) for doing IPsec, the CICV out pointeur points out of the allocated memory. [ 2.502554] ============================================================================= [ 2.510740] BUG dma-kmalloc-256 (Not tainted): Redzone overwritten [ 2.516907] ----------------------------------------------------------------------------- [ 2.516907] [ 2.526535] Disabling lock debugging due to kernel taint [ 2.531845] INFO: 0xde858108-0xde85810b. First byte 0xf8 instead of 0xcc [ 2.538549] INFO: Allocated in 0x806181a9 age=0 cpu=0 pid=58 [ 2.544229] __kmalloc+0x374/0x564 [ 2.547649] talitos_edesc_alloc+0x17c/0x48c [ 2.551929] aead_edesc_alloc+0x80/0x154 [ 2.555863] aead_encrypt+0x30/0xe0 [ 2.559368] __test_aead+0x5a0/0x1f3c [ 2.563042] test_aead+0x2c/0x110 [ 2.566371] alg_test_aead+0x5c/0xf4 [ 2.569958] alg_test+0x1dc/0x5a0 [ 2.573305] cryptomgr_test+0x50/0x70 [ 2.576984] kthread+0xd8/0x134 [ 2.580155] ret_from_kernel_thread+0x5c/0x64 [ 2.584534] INFO: Freed in ipsec_esp_encrypt_done+0x130/0x240 age=6 cpu=0 pid=0 [ 2.591839] ipsec_esp_encrypt_done+0x130/0x240 [ 2.596395] flush_channel+0x1dc/0x488 [ 2.600161] talitos2_done_4ch+0x30/0x200 [ 2.604185] tasklet_action+0xa0/0x13c [ 2.607948] __do_softirq+0x148/0x6cc [ 2.611623] irq_exit+0xc0/0x124 [ 2.614869] call_do_irq+0x24/0x3c [ 2.618292] do_IRQ+0x78/0x108 [ 2.621369] ret_from_except+0x0/0x14 [ 2.625055] finish_task_switch+0x58/0x350 [ 2.629165] schedule+0x80/0x134 [ 2.632409] schedule_preempt_disabled+0x38/0xc8 [ 2.637042] cpu_startup_entry+0xe4/0x190 [ 2.641074] start_kernel+0x3f4/0x408 [ 2.644741] 0x3438 [ 2.646857] INFO: Slab 0xdffbdb00 objects=9 used=1 fp=0xde8581c0 flags=0x0080 [ 2.653978] INFO: Object 0xde858008 @offset=8 fp=0xca4395df [ 2.653978] [ 2.661032] Redzone de858000: cc cc cc cc cc cc cc cc ........ [ 2.669029] Object de858008: 00 00 00 02 00 00 00 02 00 6b 6b 6b 1e 83 ea 28 .........kkk...( [ 2.677628] Object de858018: 00 00 00 70 1e 85 80 64 ff 73 1d 21 6b 6b 6b 6b ...p...d.s.!kkkk [ 2.686228] Object de858028: 00 20 00 00 1e 84 17 24 00 10 00 00 1e 85 70 00 . .....$......p. [ 2.694829] Object de858038: 00 18 00 00 1e 84 17 44 00 08 00 00 1e 83 ea 28 .......D.......( [ 2.703430] Object de858048: 00 80 00 00 1e 84 f0 00 00 80 00 00 1e 85 70 10 ..............p. [ 2.712030] Object de858058: 00 20 6b 00 1e 85 80 f4 6b 6b 6b 6b 00 80 02 00 . k.....kkkk.... [ 2.720629] Object de858068: 1e 84 f0 00 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b ....kkkkkkkkkkkk [ 2.729230] Object de858078: 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b kkkkkkkkkkkkkkkk [ 2.737830] Object de858088: 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b kkkkkkkkkkkkkkkk [ 2.746429] Object de858098: 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b kkkkkkkkkkkkkkkk [ 2.755029] Object de8580a8: 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b kkkkkkkkkkkkkkkk [ 2.763628] Object de8580b8: 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b kkkkkkkkkkkkkkkk [ 2.772229] Object de8580c8: 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b kkkkkkkkkkkkkkkk [ 2.780829] Object de8580d8: 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b kkkkkkkkkkkkkkkk [ 2.789430] Object de8580e8: 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b 73 b0 ea 9f kkkkkkkkkkkks... [ 2.798030] Object de8580f8: e8 18 80 d6 56 38 44 c0 db e3 4f 71 f7 ce d1 d3 ....V8D...Oq.... [ 2.806629] Redzone de858108: f8 bd 3e 4f ..>O [ 2.814279] Padding de8581b0: 5a 5a 5a 5a 5a 5a 5a 5a ZZZZZZZZ [ 2.822283] CPU: 0 PID: 0 Comm: swapper Tainted: G B 4.9.50-g995be12679 #179 [ 2.831819] Call Trace: [ 2.834301] [dffefd20] [c01aa9a8] check_bytes_and_report+0x100/0x194 (unreliable) [ 2.841801] [dffefd50] [c01aac3c] check_object+0x200/0x530 [ 2.847306] [dffefd80] [c01ae584] free_debug_processing+0x290/0x690 [ 2.853585] [dffefde0] [c01aec8c] __slab_free+0x308/0x628 [ 2.859000] [dffefe80] [c05057f4] ipsec_esp_encrypt_done+0x130/0x240 [ 2.865378] [dffefeb0] [c05002c4] flush_channel+0x1dc/0x488 [ 2.870968] [dffeff10] [c05007a8] talitos2_done_4ch+0x30/0x200 [ 2.876814] [dffeff30] [c002fe38] tasklet_action+0xa0/0x13c [ 2.882399] [dffeff60] [c002f118] __do_softirq+0x148/0x6cc [ 2.887896] [dffeffd0] [c002f954] irq_exit+0xc0/0x124 [ 2.892968] [dffefff0] [c0013adc] call_do_irq+0x24/0x3c [ 2.898213] [c0d4be00] [c000757c] do_IRQ+0x78/0x108 [ 2.903113] [c0d4be30] [c0015c08] ret_from_except+0x0/0x14 [ 2.908634] --- interrupt: 501 at finish_task_switch+0x70/0x350 [ 2.908634] LR = finish_task_switch+0x58/0x350 [ 2.919327] [c0d4bf20] [c085e1d4] schedule+0x80/0x134 [ 2.924398] [c0d4bf50] [c085e2c0] schedule_preempt_disabled+0x38/0xc8 [ 2.930853] [c0d4bf60] [c007f064] cpu_startup_entry+0xe4/0x190 [ 2.936707] [c0d4bfb0] [c096c434] start_kernel+0x3f4/0x408 [ 2.942198] [c0d4bff0] [00003438] 0x3438 [ 2.946137] FIX dma-kmalloc-256: Restoring 0xde858108-0xde85810b=0xcc [ 2.946137] [ 2.954158] FIX dma-kmalloc-256: Object at 0xde858008 not freed This patch reworks the handling of the CICV out in order to properly handle all cases. Signed-off-by: Christophe Leroy <christophe.leroy@c-s.fr> Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
2017-10-06 21:04:35 +08:00
/* ICV data */
if (ret > 1) {
tbl_off += ret;
edesc->icv_ool = true;
sync_needed = true;
if (is_ipsec_esp) {
struct talitos_ptr *tbl_ptr = &edesc->link_tbl[tbl_off];
int offset = (edesc->src_nents + edesc->dst_nents + 2) *
sizeof(struct talitos_ptr) + authsize;
/* Add an entry to the link table for ICV data */
crypto: talitos - fix memory corruption on SEC2 On SEC2, when using the old descriptors type (hmac snoop no afeu) for doing IPsec, the CICV out pointeur points out of the allocated memory. [ 2.502554] ============================================================================= [ 2.510740] BUG dma-kmalloc-256 (Not tainted): Redzone overwritten [ 2.516907] ----------------------------------------------------------------------------- [ 2.516907] [ 2.526535] Disabling lock debugging due to kernel taint [ 2.531845] INFO: 0xde858108-0xde85810b. First byte 0xf8 instead of 0xcc [ 2.538549] INFO: Allocated in 0x806181a9 age=0 cpu=0 pid=58 [ 2.544229] __kmalloc+0x374/0x564 [ 2.547649] talitos_edesc_alloc+0x17c/0x48c [ 2.551929] aead_edesc_alloc+0x80/0x154 [ 2.555863] aead_encrypt+0x30/0xe0 [ 2.559368] __test_aead+0x5a0/0x1f3c [ 2.563042] test_aead+0x2c/0x110 [ 2.566371] alg_test_aead+0x5c/0xf4 [ 2.569958] alg_test+0x1dc/0x5a0 [ 2.573305] cryptomgr_test+0x50/0x70 [ 2.576984] kthread+0xd8/0x134 [ 2.580155] ret_from_kernel_thread+0x5c/0x64 [ 2.584534] INFO: Freed in ipsec_esp_encrypt_done+0x130/0x240 age=6 cpu=0 pid=0 [ 2.591839] ipsec_esp_encrypt_done+0x130/0x240 [ 2.596395] flush_channel+0x1dc/0x488 [ 2.600161] talitos2_done_4ch+0x30/0x200 [ 2.604185] tasklet_action+0xa0/0x13c [ 2.607948] __do_softirq+0x148/0x6cc [ 2.611623] irq_exit+0xc0/0x124 [ 2.614869] call_do_irq+0x24/0x3c [ 2.618292] do_IRQ+0x78/0x108 [ 2.621369] ret_from_except+0x0/0x14 [ 2.625055] finish_task_switch+0x58/0x350 [ 2.629165] schedule+0x80/0x134 [ 2.632409] schedule_preempt_disabled+0x38/0xc8 [ 2.637042] cpu_startup_entry+0xe4/0x190 [ 2.641074] start_kernel+0x3f4/0x408 [ 2.644741] 0x3438 [ 2.646857] INFO: Slab 0xdffbdb00 objects=9 used=1 fp=0xde8581c0 flags=0x0080 [ 2.653978] INFO: Object 0xde858008 @offset=8 fp=0xca4395df [ 2.653978] [ 2.661032] Redzone de858000: cc cc cc cc cc cc cc cc ........ [ 2.669029] Object de858008: 00 00 00 02 00 00 00 02 00 6b 6b 6b 1e 83 ea 28 .........kkk...( [ 2.677628] Object de858018: 00 00 00 70 1e 85 80 64 ff 73 1d 21 6b 6b 6b 6b ...p...d.s.!kkkk [ 2.686228] Object de858028: 00 20 00 00 1e 84 17 24 00 10 00 00 1e 85 70 00 . .....$......p. [ 2.694829] Object de858038: 00 18 00 00 1e 84 17 44 00 08 00 00 1e 83 ea 28 .......D.......( [ 2.703430] Object de858048: 00 80 00 00 1e 84 f0 00 00 80 00 00 1e 85 70 10 ..............p. [ 2.712030] Object de858058: 00 20 6b 00 1e 85 80 f4 6b 6b 6b 6b 00 80 02 00 . k.....kkkk.... [ 2.720629] Object de858068: 1e 84 f0 00 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b ....kkkkkkkkkkkk [ 2.729230] Object de858078: 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b kkkkkkkkkkkkkkkk [ 2.737830] Object de858088: 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b kkkkkkkkkkkkkkkk [ 2.746429] Object de858098: 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b kkkkkkkkkkkkkkkk [ 2.755029] Object de8580a8: 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b kkkkkkkkkkkkkkkk [ 2.763628] Object de8580b8: 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b kkkkkkkkkkkkkkkk [ 2.772229] Object de8580c8: 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b kkkkkkkkkkkkkkkk [ 2.780829] Object de8580d8: 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b kkkkkkkkkkkkkkkk [ 2.789430] Object de8580e8: 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b 73 b0 ea 9f kkkkkkkkkkkks... [ 2.798030] Object de8580f8: e8 18 80 d6 56 38 44 c0 db e3 4f 71 f7 ce d1 d3 ....V8D...Oq.... [ 2.806629] Redzone de858108: f8 bd 3e 4f ..>O [ 2.814279] Padding de8581b0: 5a 5a 5a 5a 5a 5a 5a 5a ZZZZZZZZ [ 2.822283] CPU: 0 PID: 0 Comm: swapper Tainted: G B 4.9.50-g995be12679 #179 [ 2.831819] Call Trace: [ 2.834301] [dffefd20] [c01aa9a8] check_bytes_and_report+0x100/0x194 (unreliable) [ 2.841801] [dffefd50] [c01aac3c] check_object+0x200/0x530 [ 2.847306] [dffefd80] [c01ae584] free_debug_processing+0x290/0x690 [ 2.853585] [dffefde0] [c01aec8c] __slab_free+0x308/0x628 [ 2.859000] [dffefe80] [c05057f4] ipsec_esp_encrypt_done+0x130/0x240 [ 2.865378] [dffefeb0] [c05002c4] flush_channel+0x1dc/0x488 [ 2.870968] [dffeff10] [c05007a8] talitos2_done_4ch+0x30/0x200 [ 2.876814] [dffeff30] [c002fe38] tasklet_action+0xa0/0x13c [ 2.882399] [dffeff60] [c002f118] __do_softirq+0x148/0x6cc [ 2.887896] [dffeffd0] [c002f954] irq_exit+0xc0/0x124 [ 2.892968] [dffefff0] [c0013adc] call_do_irq+0x24/0x3c [ 2.898213] [c0d4be00] [c000757c] do_IRQ+0x78/0x108 [ 2.903113] [c0d4be30] [c0015c08] ret_from_except+0x0/0x14 [ 2.908634] --- interrupt: 501 at finish_task_switch+0x70/0x350 [ 2.908634] LR = finish_task_switch+0x58/0x350 [ 2.919327] [c0d4bf20] [c085e1d4] schedule+0x80/0x134 [ 2.924398] [c0d4bf50] [c085e2c0] schedule_preempt_disabled+0x38/0xc8 [ 2.930853] [c0d4bf60] [c007f064] cpu_startup_entry+0xe4/0x190 [ 2.936707] [c0d4bfb0] [c096c434] start_kernel+0x3f4/0x408 [ 2.942198] [c0d4bff0] [00003438] 0x3438 [ 2.946137] FIX dma-kmalloc-256: Restoring 0xde858108-0xde85810b=0xcc [ 2.946137] [ 2.954158] FIX dma-kmalloc-256: Object at 0xde858008 not freed This patch reworks the handling of the CICV out in order to properly handle all cases. Signed-off-by: Christophe Leroy <christophe.leroy@c-s.fr> Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
2017-10-06 21:04:35 +08:00
to_talitos_ptr_ext_set(tbl_ptr - 1, 0, is_sec1);
to_talitos_ptr_ext_set(tbl_ptr, DESC_PTR_LNKTBL_RETURN,
is_sec1);
/* icv data follows link tables */
to_talitos_ptr(tbl_ptr, edesc->dma_link_tbl + offset,
authsize, is_sec1);
crypto: talitos - fix memory corruption on SEC2 On SEC2, when using the old descriptors type (hmac snoop no afeu) for doing IPsec, the CICV out pointeur points out of the allocated memory. [ 2.502554] ============================================================================= [ 2.510740] BUG dma-kmalloc-256 (Not tainted): Redzone overwritten [ 2.516907] ----------------------------------------------------------------------------- [ 2.516907] [ 2.526535] Disabling lock debugging due to kernel taint [ 2.531845] INFO: 0xde858108-0xde85810b. First byte 0xf8 instead of 0xcc [ 2.538549] INFO: Allocated in 0x806181a9 age=0 cpu=0 pid=58 [ 2.544229] __kmalloc+0x374/0x564 [ 2.547649] talitos_edesc_alloc+0x17c/0x48c [ 2.551929] aead_edesc_alloc+0x80/0x154 [ 2.555863] aead_encrypt+0x30/0xe0 [ 2.559368] __test_aead+0x5a0/0x1f3c [ 2.563042] test_aead+0x2c/0x110 [ 2.566371] alg_test_aead+0x5c/0xf4 [ 2.569958] alg_test+0x1dc/0x5a0 [ 2.573305] cryptomgr_test+0x50/0x70 [ 2.576984] kthread+0xd8/0x134 [ 2.580155] ret_from_kernel_thread+0x5c/0x64 [ 2.584534] INFO: Freed in ipsec_esp_encrypt_done+0x130/0x240 age=6 cpu=0 pid=0 [ 2.591839] ipsec_esp_encrypt_done+0x130/0x240 [ 2.596395] flush_channel+0x1dc/0x488 [ 2.600161] talitos2_done_4ch+0x30/0x200 [ 2.604185] tasklet_action+0xa0/0x13c [ 2.607948] __do_softirq+0x148/0x6cc [ 2.611623] irq_exit+0xc0/0x124 [ 2.614869] call_do_irq+0x24/0x3c [ 2.618292] do_IRQ+0x78/0x108 [ 2.621369] ret_from_except+0x0/0x14 [ 2.625055] finish_task_switch+0x58/0x350 [ 2.629165] schedule+0x80/0x134 [ 2.632409] schedule_preempt_disabled+0x38/0xc8 [ 2.637042] cpu_startup_entry+0xe4/0x190 [ 2.641074] start_kernel+0x3f4/0x408 [ 2.644741] 0x3438 [ 2.646857] INFO: Slab 0xdffbdb00 objects=9 used=1 fp=0xde8581c0 flags=0x0080 [ 2.653978] INFO: Object 0xde858008 @offset=8 fp=0xca4395df [ 2.653978] [ 2.661032] Redzone de858000: cc cc cc cc cc cc cc cc ........ [ 2.669029] Object de858008: 00 00 00 02 00 00 00 02 00 6b 6b 6b 1e 83 ea 28 .........kkk...( [ 2.677628] Object de858018: 00 00 00 70 1e 85 80 64 ff 73 1d 21 6b 6b 6b 6b ...p...d.s.!kkkk [ 2.686228] Object de858028: 00 20 00 00 1e 84 17 24 00 10 00 00 1e 85 70 00 . .....$......p. [ 2.694829] Object de858038: 00 18 00 00 1e 84 17 44 00 08 00 00 1e 83 ea 28 .......D.......( [ 2.703430] Object de858048: 00 80 00 00 1e 84 f0 00 00 80 00 00 1e 85 70 10 ..............p. [ 2.712030] Object de858058: 00 20 6b 00 1e 85 80 f4 6b 6b 6b 6b 00 80 02 00 . k.....kkkk.... [ 2.720629] Object de858068: 1e 84 f0 00 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b ....kkkkkkkkkkkk [ 2.729230] Object de858078: 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b kkkkkkkkkkkkkkkk [ 2.737830] Object de858088: 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b kkkkkkkkkkkkkkkk [ 2.746429] Object de858098: 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b kkkkkkkkkkkkkkkk [ 2.755029] Object de8580a8: 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b kkkkkkkkkkkkkkkk [ 2.763628] Object de8580b8: 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b kkkkkkkkkkkkkkkk [ 2.772229] Object de8580c8: 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b kkkkkkkkkkkkkkkk [ 2.780829] Object de8580d8: 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b kkkkkkkkkkkkkkkk [ 2.789430] Object de8580e8: 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b 73 b0 ea 9f kkkkkkkkkkkks... [ 2.798030] Object de8580f8: e8 18 80 d6 56 38 44 c0 db e3 4f 71 f7 ce d1 d3 ....V8D...Oq.... [ 2.806629] Redzone de858108: f8 bd 3e 4f ..>O [ 2.814279] Padding de8581b0: 5a 5a 5a 5a 5a 5a 5a 5a ZZZZZZZZ [ 2.822283] CPU: 0 PID: 0 Comm: swapper Tainted: G B 4.9.50-g995be12679 #179 [ 2.831819] Call Trace: [ 2.834301] [dffefd20] [c01aa9a8] check_bytes_and_report+0x100/0x194 (unreliable) [ 2.841801] [dffefd50] [c01aac3c] check_object+0x200/0x530 [ 2.847306] [dffefd80] [c01ae584] free_debug_processing+0x290/0x690 [ 2.853585] [dffefde0] [c01aec8c] __slab_free+0x308/0x628 [ 2.859000] [dffefe80] [c05057f4] ipsec_esp_encrypt_done+0x130/0x240 [ 2.865378] [dffefeb0] [c05002c4] flush_channel+0x1dc/0x488 [ 2.870968] [dffeff10] [c05007a8] talitos2_done_4ch+0x30/0x200 [ 2.876814] [dffeff30] [c002fe38] tasklet_action+0xa0/0x13c [ 2.882399] [dffeff60] [c002f118] __do_softirq+0x148/0x6cc [ 2.887896] [dffeffd0] [c002f954] irq_exit+0xc0/0x124 [ 2.892968] [dffefff0] [c0013adc] call_do_irq+0x24/0x3c [ 2.898213] [c0d4be00] [c000757c] do_IRQ+0x78/0x108 [ 2.903113] [c0d4be30] [c0015c08] ret_from_except+0x0/0x14 [ 2.908634] --- interrupt: 501 at finish_task_switch+0x70/0x350 [ 2.908634] LR = finish_task_switch+0x58/0x350 [ 2.919327] [c0d4bf20] [c085e1d4] schedule+0x80/0x134 [ 2.924398] [c0d4bf50] [c085e2c0] schedule_preempt_disabled+0x38/0xc8 [ 2.930853] [c0d4bf60] [c007f064] cpu_startup_entry+0xe4/0x190 [ 2.936707] [c0d4bfb0] [c096c434] start_kernel+0x3f4/0x408 [ 2.942198] [c0d4bff0] [00003438] 0x3438 [ 2.946137] FIX dma-kmalloc-256: Restoring 0xde858108-0xde85810b=0xcc [ 2.946137] [ 2.954158] FIX dma-kmalloc-256: Object at 0xde858008 not freed This patch reworks the handling of the CICV out in order to properly handle all cases. Signed-off-by: Christophe Leroy <christophe.leroy@c-s.fr> Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
2017-10-06 21:04:35 +08:00
} else {
dma_addr_t addr = edesc->dma_link_tbl;
if (is_sec1)
addr += areq->assoclen + cryptlen;
else
addr += sizeof(struct talitos_ptr) * tbl_off;
to_talitos_ptr(&desc->ptr[6], addr, authsize, is_sec1);
crypto: talitos - fix memory corruption on SEC2 On SEC2, when using the old descriptors type (hmac snoop no afeu) for doing IPsec, the CICV out pointeur points out of the allocated memory. [ 2.502554] ============================================================================= [ 2.510740] BUG dma-kmalloc-256 (Not tainted): Redzone overwritten [ 2.516907] ----------------------------------------------------------------------------- [ 2.516907] [ 2.526535] Disabling lock debugging due to kernel taint [ 2.531845] INFO: 0xde858108-0xde85810b. First byte 0xf8 instead of 0xcc [ 2.538549] INFO: Allocated in 0x806181a9 age=0 cpu=0 pid=58 [ 2.544229] __kmalloc+0x374/0x564 [ 2.547649] talitos_edesc_alloc+0x17c/0x48c [ 2.551929] aead_edesc_alloc+0x80/0x154 [ 2.555863] aead_encrypt+0x30/0xe0 [ 2.559368] __test_aead+0x5a0/0x1f3c [ 2.563042] test_aead+0x2c/0x110 [ 2.566371] alg_test_aead+0x5c/0xf4 [ 2.569958] alg_test+0x1dc/0x5a0 [ 2.573305] cryptomgr_test+0x50/0x70 [ 2.576984] kthread+0xd8/0x134 [ 2.580155] ret_from_kernel_thread+0x5c/0x64 [ 2.584534] INFO: Freed in ipsec_esp_encrypt_done+0x130/0x240 age=6 cpu=0 pid=0 [ 2.591839] ipsec_esp_encrypt_done+0x130/0x240 [ 2.596395] flush_channel+0x1dc/0x488 [ 2.600161] talitos2_done_4ch+0x30/0x200 [ 2.604185] tasklet_action+0xa0/0x13c [ 2.607948] __do_softirq+0x148/0x6cc [ 2.611623] irq_exit+0xc0/0x124 [ 2.614869] call_do_irq+0x24/0x3c [ 2.618292] do_IRQ+0x78/0x108 [ 2.621369] ret_from_except+0x0/0x14 [ 2.625055] finish_task_switch+0x58/0x350 [ 2.629165] schedule+0x80/0x134 [ 2.632409] schedule_preempt_disabled+0x38/0xc8 [ 2.637042] cpu_startup_entry+0xe4/0x190 [ 2.641074] start_kernel+0x3f4/0x408 [ 2.644741] 0x3438 [ 2.646857] INFO: Slab 0xdffbdb00 objects=9 used=1 fp=0xde8581c0 flags=0x0080 [ 2.653978] INFO: Object 0xde858008 @offset=8 fp=0xca4395df [ 2.653978] [ 2.661032] Redzone de858000: cc cc cc cc cc cc cc cc ........ [ 2.669029] Object de858008: 00 00 00 02 00 00 00 02 00 6b 6b 6b 1e 83 ea 28 .........kkk...( [ 2.677628] Object de858018: 00 00 00 70 1e 85 80 64 ff 73 1d 21 6b 6b 6b 6b ...p...d.s.!kkkk [ 2.686228] Object de858028: 00 20 00 00 1e 84 17 24 00 10 00 00 1e 85 70 00 . .....$......p. [ 2.694829] Object de858038: 00 18 00 00 1e 84 17 44 00 08 00 00 1e 83 ea 28 .......D.......( [ 2.703430] Object de858048: 00 80 00 00 1e 84 f0 00 00 80 00 00 1e 85 70 10 ..............p. [ 2.712030] Object de858058: 00 20 6b 00 1e 85 80 f4 6b 6b 6b 6b 00 80 02 00 . k.....kkkk.... [ 2.720629] Object de858068: 1e 84 f0 00 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b ....kkkkkkkkkkkk [ 2.729230] Object de858078: 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b kkkkkkkkkkkkkkkk [ 2.737830] Object de858088: 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b kkkkkkkkkkkkkkkk [ 2.746429] Object de858098: 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b kkkkkkkkkkkkkkkk [ 2.755029] Object de8580a8: 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b kkkkkkkkkkkkkkkk [ 2.763628] Object de8580b8: 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b kkkkkkkkkkkkkkkk [ 2.772229] Object de8580c8: 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b kkkkkkkkkkkkkkkk [ 2.780829] Object de8580d8: 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b kkkkkkkkkkkkkkkk [ 2.789430] Object de8580e8: 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b 73 b0 ea 9f kkkkkkkkkkkks... [ 2.798030] Object de8580f8: e8 18 80 d6 56 38 44 c0 db e3 4f 71 f7 ce d1 d3 ....V8D...Oq.... [ 2.806629] Redzone de858108: f8 bd 3e 4f ..>O [ 2.814279] Padding de8581b0: 5a 5a 5a 5a 5a 5a 5a 5a ZZZZZZZZ [ 2.822283] CPU: 0 PID: 0 Comm: swapper Tainted: G B 4.9.50-g995be12679 #179 [ 2.831819] Call Trace: [ 2.834301] [dffefd20] [c01aa9a8] check_bytes_and_report+0x100/0x194 (unreliable) [ 2.841801] [dffefd50] [c01aac3c] check_object+0x200/0x530 [ 2.847306] [dffefd80] [c01ae584] free_debug_processing+0x290/0x690 [ 2.853585] [dffefde0] [c01aec8c] __slab_free+0x308/0x628 [ 2.859000] [dffefe80] [c05057f4] ipsec_esp_encrypt_done+0x130/0x240 [ 2.865378] [dffefeb0] [c05002c4] flush_channel+0x1dc/0x488 [ 2.870968] [dffeff10] [c05007a8] talitos2_done_4ch+0x30/0x200 [ 2.876814] [dffeff30] [c002fe38] tasklet_action+0xa0/0x13c [ 2.882399] [dffeff60] [c002f118] __do_softirq+0x148/0x6cc [ 2.887896] [dffeffd0] [c002f954] irq_exit+0xc0/0x124 [ 2.892968] [dffefff0] [c0013adc] call_do_irq+0x24/0x3c [ 2.898213] [c0d4be00] [c000757c] do_IRQ+0x78/0x108 [ 2.903113] [c0d4be30] [c0015c08] ret_from_except+0x0/0x14 [ 2.908634] --- interrupt: 501 at finish_task_switch+0x70/0x350 [ 2.908634] LR = finish_task_switch+0x58/0x350 [ 2.919327] [c0d4bf20] [c085e1d4] schedule+0x80/0x134 [ 2.924398] [c0d4bf50] [c085e2c0] schedule_preempt_disabled+0x38/0xc8 [ 2.930853] [c0d4bf60] [c007f064] cpu_startup_entry+0xe4/0x190 [ 2.936707] [c0d4bfb0] [c096c434] start_kernel+0x3f4/0x408 [ 2.942198] [c0d4bff0] [00003438] 0x3438 [ 2.946137] FIX dma-kmalloc-256: Restoring 0xde858108-0xde85810b=0xcc [ 2.946137] [ 2.954158] FIX dma-kmalloc-256: Object at 0xde858008 not freed This patch reworks the handling of the CICV out in order to properly handle all cases. Signed-off-by: Christophe Leroy <christophe.leroy@c-s.fr> Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
2017-10-06 21:04:35 +08:00
}
} else if (!is_ipsec_esp) {
crypto: talitos - fix memory corruption on SEC2 On SEC2, when using the old descriptors type (hmac snoop no afeu) for doing IPsec, the CICV out pointeur points out of the allocated memory. [ 2.502554] ============================================================================= [ 2.510740] BUG dma-kmalloc-256 (Not tainted): Redzone overwritten [ 2.516907] ----------------------------------------------------------------------------- [ 2.516907] [ 2.526535] Disabling lock debugging due to kernel taint [ 2.531845] INFO: 0xde858108-0xde85810b. First byte 0xf8 instead of 0xcc [ 2.538549] INFO: Allocated in 0x806181a9 age=0 cpu=0 pid=58 [ 2.544229] __kmalloc+0x374/0x564 [ 2.547649] talitos_edesc_alloc+0x17c/0x48c [ 2.551929] aead_edesc_alloc+0x80/0x154 [ 2.555863] aead_encrypt+0x30/0xe0 [ 2.559368] __test_aead+0x5a0/0x1f3c [ 2.563042] test_aead+0x2c/0x110 [ 2.566371] alg_test_aead+0x5c/0xf4 [ 2.569958] alg_test+0x1dc/0x5a0 [ 2.573305] cryptomgr_test+0x50/0x70 [ 2.576984] kthread+0xd8/0x134 [ 2.580155] ret_from_kernel_thread+0x5c/0x64 [ 2.584534] INFO: Freed in ipsec_esp_encrypt_done+0x130/0x240 age=6 cpu=0 pid=0 [ 2.591839] ipsec_esp_encrypt_done+0x130/0x240 [ 2.596395] flush_channel+0x1dc/0x488 [ 2.600161] talitos2_done_4ch+0x30/0x200 [ 2.604185] tasklet_action+0xa0/0x13c [ 2.607948] __do_softirq+0x148/0x6cc [ 2.611623] irq_exit+0xc0/0x124 [ 2.614869] call_do_irq+0x24/0x3c [ 2.618292] do_IRQ+0x78/0x108 [ 2.621369] ret_from_except+0x0/0x14 [ 2.625055] finish_task_switch+0x58/0x350 [ 2.629165] schedule+0x80/0x134 [ 2.632409] schedule_preempt_disabled+0x38/0xc8 [ 2.637042] cpu_startup_entry+0xe4/0x190 [ 2.641074] start_kernel+0x3f4/0x408 [ 2.644741] 0x3438 [ 2.646857] INFO: Slab 0xdffbdb00 objects=9 used=1 fp=0xde8581c0 flags=0x0080 [ 2.653978] INFO: Object 0xde858008 @offset=8 fp=0xca4395df [ 2.653978] [ 2.661032] Redzone de858000: cc cc cc cc cc cc cc cc ........ [ 2.669029] Object de858008: 00 00 00 02 00 00 00 02 00 6b 6b 6b 1e 83 ea 28 .........kkk...( [ 2.677628] Object de858018: 00 00 00 70 1e 85 80 64 ff 73 1d 21 6b 6b 6b 6b ...p...d.s.!kkkk [ 2.686228] Object de858028: 00 20 00 00 1e 84 17 24 00 10 00 00 1e 85 70 00 . .....$......p. [ 2.694829] Object de858038: 00 18 00 00 1e 84 17 44 00 08 00 00 1e 83 ea 28 .......D.......( [ 2.703430] Object de858048: 00 80 00 00 1e 84 f0 00 00 80 00 00 1e 85 70 10 ..............p. [ 2.712030] Object de858058: 00 20 6b 00 1e 85 80 f4 6b 6b 6b 6b 00 80 02 00 . k.....kkkk.... [ 2.720629] Object de858068: 1e 84 f0 00 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b ....kkkkkkkkkkkk [ 2.729230] Object de858078: 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b kkkkkkkkkkkkkkkk [ 2.737830] Object de858088: 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b kkkkkkkkkkkkkkkk [ 2.746429] Object de858098: 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b kkkkkkkkkkkkkkkk [ 2.755029] Object de8580a8: 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b kkkkkkkkkkkkkkkk [ 2.763628] Object de8580b8: 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b kkkkkkkkkkkkkkkk [ 2.772229] Object de8580c8: 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b kkkkkkkkkkkkkkkk [ 2.780829] Object de8580d8: 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b kkkkkkkkkkkkkkkk [ 2.789430] Object de8580e8: 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b 73 b0 ea 9f kkkkkkkkkkkks... [ 2.798030] Object de8580f8: e8 18 80 d6 56 38 44 c0 db e3 4f 71 f7 ce d1 d3 ....V8D...Oq.... [ 2.806629] Redzone de858108: f8 bd 3e 4f ..>O [ 2.814279] Padding de8581b0: 5a 5a 5a 5a 5a 5a 5a 5a ZZZZZZZZ [ 2.822283] CPU: 0 PID: 0 Comm: swapper Tainted: G B 4.9.50-g995be12679 #179 [ 2.831819] Call Trace: [ 2.834301] [dffefd20] [c01aa9a8] check_bytes_and_report+0x100/0x194 (unreliable) [ 2.841801] [dffefd50] [c01aac3c] check_object+0x200/0x530 [ 2.847306] [dffefd80] [c01ae584] free_debug_processing+0x290/0x690 [ 2.853585] [dffefde0] [c01aec8c] __slab_free+0x308/0x628 [ 2.859000] [dffefe80] [c05057f4] ipsec_esp_encrypt_done+0x130/0x240 [ 2.865378] [dffefeb0] [c05002c4] flush_channel+0x1dc/0x488 [ 2.870968] [dffeff10] [c05007a8] talitos2_done_4ch+0x30/0x200 [ 2.876814] [dffeff30] [c002fe38] tasklet_action+0xa0/0x13c [ 2.882399] [dffeff60] [c002f118] __do_softirq+0x148/0x6cc [ 2.887896] [dffeffd0] [c002f954] irq_exit+0xc0/0x124 [ 2.892968] [dffefff0] [c0013adc] call_do_irq+0x24/0x3c [ 2.898213] [c0d4be00] [c000757c] do_IRQ+0x78/0x108 [ 2.903113] [c0d4be30] [c0015c08] ret_from_except+0x0/0x14 [ 2.908634] --- interrupt: 501 at finish_task_switch+0x70/0x350 [ 2.908634] LR = finish_task_switch+0x58/0x350 [ 2.919327] [c0d4bf20] [c085e1d4] schedule+0x80/0x134 [ 2.924398] [c0d4bf50] [c085e2c0] schedule_preempt_disabled+0x38/0xc8 [ 2.930853] [c0d4bf60] [c007f064] cpu_startup_entry+0xe4/0x190 [ 2.936707] [c0d4bfb0] [c096c434] start_kernel+0x3f4/0x408 [ 2.942198] [c0d4bff0] [00003438] 0x3438 [ 2.946137] FIX dma-kmalloc-256: Restoring 0xde858108-0xde85810b=0xcc [ 2.946137] [ 2.954158] FIX dma-kmalloc-256: Object at 0xde858008 not freed This patch reworks the handling of the CICV out in order to properly handle all cases. Signed-off-by: Christophe Leroy <christophe.leroy@c-s.fr> Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
2017-10-06 21:04:35 +08:00
ret = talitos_sg_map(dev, areq->dst, authsize, edesc,
&desc->ptr[6], sg_count, areq->assoclen +
cryptlen,
tbl_off);
if (ret > 1) {
tbl_off += ret;
edesc->icv_ool = true;
sync_needed = true;
} else {
edesc->icv_ool = false;
}
} else {
edesc->icv_ool = false;
}
/* iv out */
if (is_ipsec_esp)
map_single_talitos_ptr(dev, &desc->ptr[6], ivsize, ctx->iv,
DMA_FROM_DEVICE);
if (sync_needed)
dma_sync_single_for_device(dev, edesc->dma_link_tbl,
edesc->dma_len,
DMA_BIDIRECTIONAL);
ret = talitos_submit(dev, ctx->ch, desc, callback, areq);
if (ret != -EINPROGRESS) {
ipsec_esp_unmap(dev, edesc, areq);
kfree(edesc);
}
return ret;
}
/*
* allocate and map the extended descriptor
*/
static struct talitos_edesc *talitos_edesc_alloc(struct device *dev,
struct scatterlist *src,
struct scatterlist *dst,
u8 *iv,
unsigned int assoclen,
unsigned int cryptlen,
unsigned int authsize,
unsigned int ivsize,
int icv_stashing,
crypto: talitos - fix aead sglen for case 'dst != src' For aead case when source and destination buffers are different, there is an incorrect assumption that the source length includes the ICV length. Fix this, since it leads to an oops when using sg_count() to find the number of nents in the scatterlist: Unable to handle kernel paging request for data at address 0x00000004 Faulting instruction address: 0xf2265a28 Oops: Kernel access of bad area, sig: 11 [#1] SMP NR_CPUS=8 P2020 RDB Modules linked in: talitos(+) CPU: 1 PID: 2187 Comm: cryptomgr_test Not tainted 3.11.0 #12 task: c4e72e20 ti: ef634000 task.ti: ef634000 NIP: f2265a28 LR: f2266ad8 CTR: c000c900 REGS: ef635bb0 TRAP: 0300 Not tainted (3.11.0) MSR: 00029000 <CE,EE,ME> CR: 42042084 XER: 00000000 DEAR: 00000004, ESR: 00000000 GPR00: f2266e10 ef635c60 c4e72e20 00000001 00000014 ef635c69 00000001 c11f3082 GPR08: 00000010 00000000 00000002 2f635d58 22044084 00000000 00000000 c0755c80 GPR16: c4bf1000 ef784000 00000000 00000000 00000020 00000014 00000010 ef2f6100 GPR24: ef2f6200 00000024 ef143210 ef2f6000 00000000 ef635d58 00000000 2f635d58 NIP [f2265a28] sg_count+0x1c/0xb4 [talitos] LR [f2266ad8] talitos_edesc_alloc+0x12c/0x410 [talitos] Call Trace: [ef635c60] [c0552068] schedule_timeout+0x148/0x1ac (unreliable) [ef635cc0] [f2266e10] aead_edesc_alloc+0x54/0x64 [talitos] [ef635ce0] [f22680f0] aead_encrypt+0x24/0x70 [talitos] [ef635cf0] [c024b948] __test_aead+0x494/0xf68 [ef635e20] [c024d54c] test_aead+0x64/0xcc [ef635e40] [c024d604] alg_test_aead+0x50/0xc4 [ef635e60] [c024c838] alg_test+0x10c/0x2e4 [ef635ee0] [c0249d1c] cryptomgr_test+0x4c/0x54 [ef635ef0] [c005d598] kthread+0xa8/0xac [ef635f40] [c000e3bc] ret_from_kernel_thread+0x5c/0x64 Instruction dump: 81230024 552807fe 0f080000 5523003a 4bffff24 39000000 2c040000 99050000 408100a0 7c691b78 38c00001 38600000 <80e90004> 38630001 8109000c 70ea0002 ---[ end trace 4498123cd8478591 ]--- Signed-off-by: Horia Geanta <horia.geanta@freescale.com> Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
2013-11-28 21:11:17 +08:00
u32 cryptoflags,
bool encrypt)
{
struct talitos_edesc *edesc;
int src_nents, dst_nents, alloc_len, dma_len, src_len, dst_len;
dma_addr_t iv_dma = 0;
gfp_t flags = cryptoflags & CRYPTO_TFM_REQ_MAY_SLEEP ? GFP_KERNEL :
GFP_ATOMIC;
struct talitos_private *priv = dev_get_drvdata(dev);
bool is_sec1 = has_ftr_sec1(priv);
int max_len = is_sec1 ? TALITOS1_MAX_DATA_LEN : TALITOS2_MAX_DATA_LEN;
void *err;
if (cryptlen + authsize > max_len) {
dev_err(dev, "length exceeds h/w max limit\n");
return ERR_PTR(-EINVAL);
}
if (ivsize)
iv_dma = dma_map_single(dev, iv, ivsize, DMA_TO_DEVICE);
crypto: talitos - fix aead sglen for case 'dst != src' For aead case when source and destination buffers are different, there is an incorrect assumption that the source length includes the ICV length. Fix this, since it leads to an oops when using sg_count() to find the number of nents in the scatterlist: Unable to handle kernel paging request for data at address 0x00000004 Faulting instruction address: 0xf2265a28 Oops: Kernel access of bad area, sig: 11 [#1] SMP NR_CPUS=8 P2020 RDB Modules linked in: talitos(+) CPU: 1 PID: 2187 Comm: cryptomgr_test Not tainted 3.11.0 #12 task: c4e72e20 ti: ef634000 task.ti: ef634000 NIP: f2265a28 LR: f2266ad8 CTR: c000c900 REGS: ef635bb0 TRAP: 0300 Not tainted (3.11.0) MSR: 00029000 <CE,EE,ME> CR: 42042084 XER: 00000000 DEAR: 00000004, ESR: 00000000 GPR00: f2266e10 ef635c60 c4e72e20 00000001 00000014 ef635c69 00000001 c11f3082 GPR08: 00000010 00000000 00000002 2f635d58 22044084 00000000 00000000 c0755c80 GPR16: c4bf1000 ef784000 00000000 00000000 00000020 00000014 00000010 ef2f6100 GPR24: ef2f6200 00000024 ef143210 ef2f6000 00000000 ef635d58 00000000 2f635d58 NIP [f2265a28] sg_count+0x1c/0xb4 [talitos] LR [f2266ad8] talitos_edesc_alloc+0x12c/0x410 [talitos] Call Trace: [ef635c60] [c0552068] schedule_timeout+0x148/0x1ac (unreliable) [ef635cc0] [f2266e10] aead_edesc_alloc+0x54/0x64 [talitos] [ef635ce0] [f22680f0] aead_encrypt+0x24/0x70 [talitos] [ef635cf0] [c024b948] __test_aead+0x494/0xf68 [ef635e20] [c024d54c] test_aead+0x64/0xcc [ef635e40] [c024d604] alg_test_aead+0x50/0xc4 [ef635e60] [c024c838] alg_test+0x10c/0x2e4 [ef635ee0] [c0249d1c] cryptomgr_test+0x4c/0x54 [ef635ef0] [c005d598] kthread+0xa8/0xac [ef635f40] [c000e3bc] ret_from_kernel_thread+0x5c/0x64 Instruction dump: 81230024 552807fe 0f080000 5523003a 4bffff24 39000000 2c040000 99050000 408100a0 7c691b78 38c00001 38600000 <80e90004> 38630001 8109000c 70ea0002 ---[ end trace 4498123cd8478591 ]--- Signed-off-by: Horia Geanta <horia.geanta@freescale.com> Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
2013-11-28 21:11:17 +08:00
if (!dst || dst == src) {
src_len = assoclen + cryptlen + authsize;
src_nents = sg_nents_for_len(src, src_len);
if (src_nents < 0) {
dev_err(dev, "Invalid number of src SG.\n");
err = ERR_PTR(-EINVAL);
goto error_sg;
}
crypto: talitos - fix aead sglen for case 'dst != src' For aead case when source and destination buffers are different, there is an incorrect assumption that the source length includes the ICV length. Fix this, since it leads to an oops when using sg_count() to find the number of nents in the scatterlist: Unable to handle kernel paging request for data at address 0x00000004 Faulting instruction address: 0xf2265a28 Oops: Kernel access of bad area, sig: 11 [#1] SMP NR_CPUS=8 P2020 RDB Modules linked in: talitos(+) CPU: 1 PID: 2187 Comm: cryptomgr_test Not tainted 3.11.0 #12 task: c4e72e20 ti: ef634000 task.ti: ef634000 NIP: f2265a28 LR: f2266ad8 CTR: c000c900 REGS: ef635bb0 TRAP: 0300 Not tainted (3.11.0) MSR: 00029000 <CE,EE,ME> CR: 42042084 XER: 00000000 DEAR: 00000004, ESR: 00000000 GPR00: f2266e10 ef635c60 c4e72e20 00000001 00000014 ef635c69 00000001 c11f3082 GPR08: 00000010 00000000 00000002 2f635d58 22044084 00000000 00000000 c0755c80 GPR16: c4bf1000 ef784000 00000000 00000000 00000020 00000014 00000010 ef2f6100 GPR24: ef2f6200 00000024 ef143210 ef2f6000 00000000 ef635d58 00000000 2f635d58 NIP [f2265a28] sg_count+0x1c/0xb4 [talitos] LR [f2266ad8] talitos_edesc_alloc+0x12c/0x410 [talitos] Call Trace: [ef635c60] [c0552068] schedule_timeout+0x148/0x1ac (unreliable) [ef635cc0] [f2266e10] aead_edesc_alloc+0x54/0x64 [talitos] [ef635ce0] [f22680f0] aead_encrypt+0x24/0x70 [talitos] [ef635cf0] [c024b948] __test_aead+0x494/0xf68 [ef635e20] [c024d54c] test_aead+0x64/0xcc [ef635e40] [c024d604] alg_test_aead+0x50/0xc4 [ef635e60] [c024c838] alg_test+0x10c/0x2e4 [ef635ee0] [c0249d1c] cryptomgr_test+0x4c/0x54 [ef635ef0] [c005d598] kthread+0xa8/0xac [ef635f40] [c000e3bc] ret_from_kernel_thread+0x5c/0x64 Instruction dump: 81230024 552807fe 0f080000 5523003a 4bffff24 39000000 2c040000 99050000 408100a0 7c691b78 38c00001 38600000 <80e90004> 38630001 8109000c 70ea0002 ---[ end trace 4498123cd8478591 ]--- Signed-off-by: Horia Geanta <horia.geanta@freescale.com> Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
2013-11-28 21:11:17 +08:00
src_nents = (src_nents == 1) ? 0 : src_nents;
dst_nents = dst ? src_nents : 0;
dst_len = 0;
crypto: talitos - fix aead sglen for case 'dst != src' For aead case when source and destination buffers are different, there is an incorrect assumption that the source length includes the ICV length. Fix this, since it leads to an oops when using sg_count() to find the number of nents in the scatterlist: Unable to handle kernel paging request for data at address 0x00000004 Faulting instruction address: 0xf2265a28 Oops: Kernel access of bad area, sig: 11 [#1] SMP NR_CPUS=8 P2020 RDB Modules linked in: talitos(+) CPU: 1 PID: 2187 Comm: cryptomgr_test Not tainted 3.11.0 #12 task: c4e72e20 ti: ef634000 task.ti: ef634000 NIP: f2265a28 LR: f2266ad8 CTR: c000c900 REGS: ef635bb0 TRAP: 0300 Not tainted (3.11.0) MSR: 00029000 <CE,EE,ME> CR: 42042084 XER: 00000000 DEAR: 00000004, ESR: 00000000 GPR00: f2266e10 ef635c60 c4e72e20 00000001 00000014 ef635c69 00000001 c11f3082 GPR08: 00000010 00000000 00000002 2f635d58 22044084 00000000 00000000 c0755c80 GPR16: c4bf1000 ef784000 00000000 00000000 00000020 00000014 00000010 ef2f6100 GPR24: ef2f6200 00000024 ef143210 ef2f6000 00000000 ef635d58 00000000 2f635d58 NIP [f2265a28] sg_count+0x1c/0xb4 [talitos] LR [f2266ad8] talitos_edesc_alloc+0x12c/0x410 [talitos] Call Trace: [ef635c60] [c0552068] schedule_timeout+0x148/0x1ac (unreliable) [ef635cc0] [f2266e10] aead_edesc_alloc+0x54/0x64 [talitos] [ef635ce0] [f22680f0] aead_encrypt+0x24/0x70 [talitos] [ef635cf0] [c024b948] __test_aead+0x494/0xf68 [ef635e20] [c024d54c] test_aead+0x64/0xcc [ef635e40] [c024d604] alg_test_aead+0x50/0xc4 [ef635e60] [c024c838] alg_test+0x10c/0x2e4 [ef635ee0] [c0249d1c] cryptomgr_test+0x4c/0x54 [ef635ef0] [c005d598] kthread+0xa8/0xac [ef635f40] [c000e3bc] ret_from_kernel_thread+0x5c/0x64 Instruction dump: 81230024 552807fe 0f080000 5523003a 4bffff24 39000000 2c040000 99050000 408100a0 7c691b78 38c00001 38600000 <80e90004> 38630001 8109000c 70ea0002 ---[ end trace 4498123cd8478591 ]--- Signed-off-by: Horia Geanta <horia.geanta@freescale.com> Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
2013-11-28 21:11:17 +08:00
} else { /* dst && dst != src*/
src_len = assoclen + cryptlen + (encrypt ? 0 : authsize);
src_nents = sg_nents_for_len(src, src_len);
if (src_nents < 0) {
dev_err(dev, "Invalid number of src SG.\n");
err = ERR_PTR(-EINVAL);
goto error_sg;
}
crypto: talitos - fix aead sglen for case 'dst != src' For aead case when source and destination buffers are different, there is an incorrect assumption that the source length includes the ICV length. Fix this, since it leads to an oops when using sg_count() to find the number of nents in the scatterlist: Unable to handle kernel paging request for data at address 0x00000004 Faulting instruction address: 0xf2265a28 Oops: Kernel access of bad area, sig: 11 [#1] SMP NR_CPUS=8 P2020 RDB Modules linked in: talitos(+) CPU: 1 PID: 2187 Comm: cryptomgr_test Not tainted 3.11.0 #12 task: c4e72e20 ti: ef634000 task.ti: ef634000 NIP: f2265a28 LR: f2266ad8 CTR: c000c900 REGS: ef635bb0 TRAP: 0300 Not tainted (3.11.0) MSR: 00029000 <CE,EE,ME> CR: 42042084 XER: 00000000 DEAR: 00000004, ESR: 00000000 GPR00: f2266e10 ef635c60 c4e72e20 00000001 00000014 ef635c69 00000001 c11f3082 GPR08: 00000010 00000000 00000002 2f635d58 22044084 00000000 00000000 c0755c80 GPR16: c4bf1000 ef784000 00000000 00000000 00000020 00000014 00000010 ef2f6100 GPR24: ef2f6200 00000024 ef143210 ef2f6000 00000000 ef635d58 00000000 2f635d58 NIP [f2265a28] sg_count+0x1c/0xb4 [talitos] LR [f2266ad8] talitos_edesc_alloc+0x12c/0x410 [talitos] Call Trace: [ef635c60] [c0552068] schedule_timeout+0x148/0x1ac (unreliable) [ef635cc0] [f2266e10] aead_edesc_alloc+0x54/0x64 [talitos] [ef635ce0] [f22680f0] aead_encrypt+0x24/0x70 [talitos] [ef635cf0] [c024b948] __test_aead+0x494/0xf68 [ef635e20] [c024d54c] test_aead+0x64/0xcc [ef635e40] [c024d604] alg_test_aead+0x50/0xc4 [ef635e60] [c024c838] alg_test+0x10c/0x2e4 [ef635ee0] [c0249d1c] cryptomgr_test+0x4c/0x54 [ef635ef0] [c005d598] kthread+0xa8/0xac [ef635f40] [c000e3bc] ret_from_kernel_thread+0x5c/0x64 Instruction dump: 81230024 552807fe 0f080000 5523003a 4bffff24 39000000 2c040000 99050000 408100a0 7c691b78 38c00001 38600000 <80e90004> 38630001 8109000c 70ea0002 ---[ end trace 4498123cd8478591 ]--- Signed-off-by: Horia Geanta <horia.geanta@freescale.com> Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
2013-11-28 21:11:17 +08:00
src_nents = (src_nents == 1) ? 0 : src_nents;
dst_len = assoclen + cryptlen + (encrypt ? authsize : 0);
dst_nents = sg_nents_for_len(dst, dst_len);
if (dst_nents < 0) {
dev_err(dev, "Invalid number of dst SG.\n");
err = ERR_PTR(-EINVAL);
goto error_sg;
}
crypto: talitos - fix aead sglen for case 'dst != src' For aead case when source and destination buffers are different, there is an incorrect assumption that the source length includes the ICV length. Fix this, since it leads to an oops when using sg_count() to find the number of nents in the scatterlist: Unable to handle kernel paging request for data at address 0x00000004 Faulting instruction address: 0xf2265a28 Oops: Kernel access of bad area, sig: 11 [#1] SMP NR_CPUS=8 P2020 RDB Modules linked in: talitos(+) CPU: 1 PID: 2187 Comm: cryptomgr_test Not tainted 3.11.0 #12 task: c4e72e20 ti: ef634000 task.ti: ef634000 NIP: f2265a28 LR: f2266ad8 CTR: c000c900 REGS: ef635bb0 TRAP: 0300 Not tainted (3.11.0) MSR: 00029000 <CE,EE,ME> CR: 42042084 XER: 00000000 DEAR: 00000004, ESR: 00000000 GPR00: f2266e10 ef635c60 c4e72e20 00000001 00000014 ef635c69 00000001 c11f3082 GPR08: 00000010 00000000 00000002 2f635d58 22044084 00000000 00000000 c0755c80 GPR16: c4bf1000 ef784000 00000000 00000000 00000020 00000014 00000010 ef2f6100 GPR24: ef2f6200 00000024 ef143210 ef2f6000 00000000 ef635d58 00000000 2f635d58 NIP [f2265a28] sg_count+0x1c/0xb4 [talitos] LR [f2266ad8] talitos_edesc_alloc+0x12c/0x410 [talitos] Call Trace: [ef635c60] [c0552068] schedule_timeout+0x148/0x1ac (unreliable) [ef635cc0] [f2266e10] aead_edesc_alloc+0x54/0x64 [talitos] [ef635ce0] [f22680f0] aead_encrypt+0x24/0x70 [talitos] [ef635cf0] [c024b948] __test_aead+0x494/0xf68 [ef635e20] [c024d54c] test_aead+0x64/0xcc [ef635e40] [c024d604] alg_test_aead+0x50/0xc4 [ef635e60] [c024c838] alg_test+0x10c/0x2e4 [ef635ee0] [c0249d1c] cryptomgr_test+0x4c/0x54 [ef635ef0] [c005d598] kthread+0xa8/0xac [ef635f40] [c000e3bc] ret_from_kernel_thread+0x5c/0x64 Instruction dump: 81230024 552807fe 0f080000 5523003a 4bffff24 39000000 2c040000 99050000 408100a0 7c691b78 38c00001 38600000 <80e90004> 38630001 8109000c 70ea0002 ---[ end trace 4498123cd8478591 ]--- Signed-off-by: Horia Geanta <horia.geanta@freescale.com> Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
2013-11-28 21:11:17 +08:00
dst_nents = (dst_nents == 1) ? 0 : dst_nents;
}
/*
* allocate space for base edesc plus the link tables,
* allowing for two separate entries for AD and generated ICV (+ 2),
* and space for two sets of ICVs (stashed and generated)
*/
alloc_len = sizeof(struct talitos_edesc);
if (src_nents || dst_nents) {
if (is_sec1)
dma_len = (src_nents ? src_len : 0) +
(dst_nents ? dst_len : 0);
else
dma_len = (src_nents + dst_nents + 2) *
sizeof(struct talitos_ptr) + authsize * 2;
alloc_len += dma_len;
} else {
dma_len = 0;
alloc_len += icv_stashing ? authsize : 0;
}
crypto: talitos - chain in buffered data for ahash on SEC1 SEC1 doesn't support S/G in descriptors so for hash operations, the CPU has to build a buffer containing the buffered block and the incoming data. This generates a lot of memory copies which represents more than 50% of CPU time of a md5sum operation as shown below with a 'perf record'. |--86.24%-- kcapi_md_digest | | | |--86.18%-- _kcapi_common_vmsplice_chunk_fd | | | | | |--83.68%-- splice | | | | | | | |--83.59%-- ret_from_syscall | | | | | | | | | |--83.52%-- sys_splice | | | | | | | | | | | |--83.49%-- splice_from_pipe | | | | | | | | | | | | | |--83.04%-- __splice_from_pipe | | | | | | | | | | | | | | | |--80.67%-- pipe_to_sendpage | | | | | | | | | | | | | | | | | |--78.25%-- hash_sendpage | | | | | | | | | | | | | | | | | | | |--60.08%-- ahash_process_req | | | | | | | | | | | | | | | | | | | | | |--56.36%-- sg_copy_buffer | | | | | | | | | | | | | | | | | | | | | | | |--55.29%-- memcpy | | | | | | | | | | | | However, unlike SEC2+, SEC1 offers the possibility to chain descriptors. It is therefore possible to build a first descriptor pointing to the buffered data and a second descriptor pointing to the incoming data, hence avoiding the memory copy to a single buffer. With this patch, the time necessary for a md5sum on a 90Mbytes file is approximately 3 seconds. Without the patch it takes 6 seconds. Signed-off-by: Christophe Leroy <christophe.leroy@c-s.fr> Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
2017-10-06 21:05:06 +08:00
/* if its a ahash, add space for a second desc next to the first one */
if (is_sec1 && !dst)
alloc_len += sizeof(struct talitos_desc);
edesc = kmalloc(alloc_len, GFP_DMA | flags);
if (!edesc) {
dev_err(dev, "could not allocate edescriptor\n");
err = ERR_PTR(-ENOMEM);
goto error_sg;
}
memset(&edesc->desc, 0, sizeof(edesc->desc));
edesc->src_nents = src_nents;
edesc->dst_nents = dst_nents;
edesc->iv_dma = iv_dma;
edesc->dma_len = dma_len;
crypto: talitos - chain in buffered data for ahash on SEC1 SEC1 doesn't support S/G in descriptors so for hash operations, the CPU has to build a buffer containing the buffered block and the incoming data. This generates a lot of memory copies which represents more than 50% of CPU time of a md5sum operation as shown below with a 'perf record'. |--86.24%-- kcapi_md_digest | | | |--86.18%-- _kcapi_common_vmsplice_chunk_fd | | | | | |--83.68%-- splice | | | | | | | |--83.59%-- ret_from_syscall | | | | | | | | | |--83.52%-- sys_splice | | | | | | | | | | | |--83.49%-- splice_from_pipe | | | | | | | | | | | | | |--83.04%-- __splice_from_pipe | | | | | | | | | | | | | | | |--80.67%-- pipe_to_sendpage | | | | | | | | | | | | | | | | | |--78.25%-- hash_sendpage | | | | | | | | | | | | | | | | | | | |--60.08%-- ahash_process_req | | | | | | | | | | | | | | | | | | | | | |--56.36%-- sg_copy_buffer | | | | | | | | | | | | | | | | | | | | | | | |--55.29%-- memcpy | | | | | | | | | | | | However, unlike SEC2+, SEC1 offers the possibility to chain descriptors. It is therefore possible to build a first descriptor pointing to the buffered data and a second descriptor pointing to the incoming data, hence avoiding the memory copy to a single buffer. With this patch, the time necessary for a md5sum on a 90Mbytes file is approximately 3 seconds. Without the patch it takes 6 seconds. Signed-off-by: Christophe Leroy <christophe.leroy@c-s.fr> Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
2017-10-06 21:05:06 +08:00
if (dma_len) {
void *addr = &edesc->link_tbl[0];
if (is_sec1 && !dst)
addr += sizeof(struct talitos_desc);
edesc->dma_link_tbl = dma_map_single(dev, addr,
edesc->dma_len,
DMA_BIDIRECTIONAL);
crypto: talitos - chain in buffered data for ahash on SEC1 SEC1 doesn't support S/G in descriptors so for hash operations, the CPU has to build a buffer containing the buffered block and the incoming data. This generates a lot of memory copies which represents more than 50% of CPU time of a md5sum operation as shown below with a 'perf record'. |--86.24%-- kcapi_md_digest | | | |--86.18%-- _kcapi_common_vmsplice_chunk_fd | | | | | |--83.68%-- splice | | | | | | | |--83.59%-- ret_from_syscall | | | | | | | | | |--83.52%-- sys_splice | | | | | | | | | | | |--83.49%-- splice_from_pipe | | | | | | | | | | | | | |--83.04%-- __splice_from_pipe | | | | | | | | | | | | | | | |--80.67%-- pipe_to_sendpage | | | | | | | | | | | | | | | | | |--78.25%-- hash_sendpage | | | | | | | | | | | | | | | | | | | |--60.08%-- ahash_process_req | | | | | | | | | | | | | | | | | | | | | |--56.36%-- sg_copy_buffer | | | | | | | | | | | | | | | | | | | | | | | |--55.29%-- memcpy | | | | | | | | | | | | However, unlike SEC2+, SEC1 offers the possibility to chain descriptors. It is therefore possible to build a first descriptor pointing to the buffered data and a second descriptor pointing to the incoming data, hence avoiding the memory copy to a single buffer. With this patch, the time necessary for a md5sum on a 90Mbytes file is approximately 3 seconds. Without the patch it takes 6 seconds. Signed-off-by: Christophe Leroy <christophe.leroy@c-s.fr> Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
2017-10-06 21:05:06 +08:00
}
return edesc;
error_sg:
if (iv_dma)
dma_unmap_single(dev, iv_dma, ivsize, DMA_TO_DEVICE);
return err;
}
static struct talitos_edesc *aead_edesc_alloc(struct aead_request *areq, u8 *iv,
crypto: talitos - fix aead sglen for case 'dst != src' For aead case when source and destination buffers are different, there is an incorrect assumption that the source length includes the ICV length. Fix this, since it leads to an oops when using sg_count() to find the number of nents in the scatterlist: Unable to handle kernel paging request for data at address 0x00000004 Faulting instruction address: 0xf2265a28 Oops: Kernel access of bad area, sig: 11 [#1] SMP NR_CPUS=8 P2020 RDB Modules linked in: talitos(+) CPU: 1 PID: 2187 Comm: cryptomgr_test Not tainted 3.11.0 #12 task: c4e72e20 ti: ef634000 task.ti: ef634000 NIP: f2265a28 LR: f2266ad8 CTR: c000c900 REGS: ef635bb0 TRAP: 0300 Not tainted (3.11.0) MSR: 00029000 <CE,EE,ME> CR: 42042084 XER: 00000000 DEAR: 00000004, ESR: 00000000 GPR00: f2266e10 ef635c60 c4e72e20 00000001 00000014 ef635c69 00000001 c11f3082 GPR08: 00000010 00000000 00000002 2f635d58 22044084 00000000 00000000 c0755c80 GPR16: c4bf1000 ef784000 00000000 00000000 00000020 00000014 00000010 ef2f6100 GPR24: ef2f6200 00000024 ef143210 ef2f6000 00000000 ef635d58 00000000 2f635d58 NIP [f2265a28] sg_count+0x1c/0xb4 [talitos] LR [f2266ad8] talitos_edesc_alloc+0x12c/0x410 [talitos] Call Trace: [ef635c60] [c0552068] schedule_timeout+0x148/0x1ac (unreliable) [ef635cc0] [f2266e10] aead_edesc_alloc+0x54/0x64 [talitos] [ef635ce0] [f22680f0] aead_encrypt+0x24/0x70 [talitos] [ef635cf0] [c024b948] __test_aead+0x494/0xf68 [ef635e20] [c024d54c] test_aead+0x64/0xcc [ef635e40] [c024d604] alg_test_aead+0x50/0xc4 [ef635e60] [c024c838] alg_test+0x10c/0x2e4 [ef635ee0] [c0249d1c] cryptomgr_test+0x4c/0x54 [ef635ef0] [c005d598] kthread+0xa8/0xac [ef635f40] [c000e3bc] ret_from_kernel_thread+0x5c/0x64 Instruction dump: 81230024 552807fe 0f080000 5523003a 4bffff24 39000000 2c040000 99050000 408100a0 7c691b78 38c00001 38600000 <80e90004> 38630001 8109000c 70ea0002 ---[ end trace 4498123cd8478591 ]--- Signed-off-by: Horia Geanta <horia.geanta@freescale.com> Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
2013-11-28 21:11:17 +08:00
int icv_stashing, bool encrypt)
{
struct crypto_aead *authenc = crypto_aead_reqtfm(areq);
unsigned int authsize = crypto_aead_authsize(authenc);
struct talitos_ctx *ctx = crypto_aead_ctx(authenc);
unsigned int ivsize = crypto_aead_ivsize(authenc);
return talitos_edesc_alloc(ctx->dev, areq->src, areq->dst,
iv, areq->assoclen, areq->cryptlen,
authsize, ivsize, icv_stashing,
crypto: talitos - fix aead sglen for case 'dst != src' For aead case when source and destination buffers are different, there is an incorrect assumption that the source length includes the ICV length. Fix this, since it leads to an oops when using sg_count() to find the number of nents in the scatterlist: Unable to handle kernel paging request for data at address 0x00000004 Faulting instruction address: 0xf2265a28 Oops: Kernel access of bad area, sig: 11 [#1] SMP NR_CPUS=8 P2020 RDB Modules linked in: talitos(+) CPU: 1 PID: 2187 Comm: cryptomgr_test Not tainted 3.11.0 #12 task: c4e72e20 ti: ef634000 task.ti: ef634000 NIP: f2265a28 LR: f2266ad8 CTR: c000c900 REGS: ef635bb0 TRAP: 0300 Not tainted (3.11.0) MSR: 00029000 <CE,EE,ME> CR: 42042084 XER: 00000000 DEAR: 00000004, ESR: 00000000 GPR00: f2266e10 ef635c60 c4e72e20 00000001 00000014 ef635c69 00000001 c11f3082 GPR08: 00000010 00000000 00000002 2f635d58 22044084 00000000 00000000 c0755c80 GPR16: c4bf1000 ef784000 00000000 00000000 00000020 00000014 00000010 ef2f6100 GPR24: ef2f6200 00000024 ef143210 ef2f6000 00000000 ef635d58 00000000 2f635d58 NIP [f2265a28] sg_count+0x1c/0xb4 [talitos] LR [f2266ad8] talitos_edesc_alloc+0x12c/0x410 [talitos] Call Trace: [ef635c60] [c0552068] schedule_timeout+0x148/0x1ac (unreliable) [ef635cc0] [f2266e10] aead_edesc_alloc+0x54/0x64 [talitos] [ef635ce0] [f22680f0] aead_encrypt+0x24/0x70 [talitos] [ef635cf0] [c024b948] __test_aead+0x494/0xf68 [ef635e20] [c024d54c] test_aead+0x64/0xcc [ef635e40] [c024d604] alg_test_aead+0x50/0xc4 [ef635e60] [c024c838] alg_test+0x10c/0x2e4 [ef635ee0] [c0249d1c] cryptomgr_test+0x4c/0x54 [ef635ef0] [c005d598] kthread+0xa8/0xac [ef635f40] [c000e3bc] ret_from_kernel_thread+0x5c/0x64 Instruction dump: 81230024 552807fe 0f080000 5523003a 4bffff24 39000000 2c040000 99050000 408100a0 7c691b78 38c00001 38600000 <80e90004> 38630001 8109000c 70ea0002 ---[ end trace 4498123cd8478591 ]--- Signed-off-by: Horia Geanta <horia.geanta@freescale.com> Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
2013-11-28 21:11:17 +08:00
areq->base.flags, encrypt);
}
static int aead_encrypt(struct aead_request *req)
{
struct crypto_aead *authenc = crypto_aead_reqtfm(req);
struct talitos_ctx *ctx = crypto_aead_ctx(authenc);
struct talitos_edesc *edesc;
/* allocate extended descriptor */
crypto: talitos - fix aead sglen for case 'dst != src' For aead case when source and destination buffers are different, there is an incorrect assumption that the source length includes the ICV length. Fix this, since it leads to an oops when using sg_count() to find the number of nents in the scatterlist: Unable to handle kernel paging request for data at address 0x00000004 Faulting instruction address: 0xf2265a28 Oops: Kernel access of bad area, sig: 11 [#1] SMP NR_CPUS=8 P2020 RDB Modules linked in: talitos(+) CPU: 1 PID: 2187 Comm: cryptomgr_test Not tainted 3.11.0 #12 task: c4e72e20 ti: ef634000 task.ti: ef634000 NIP: f2265a28 LR: f2266ad8 CTR: c000c900 REGS: ef635bb0 TRAP: 0300 Not tainted (3.11.0) MSR: 00029000 <CE,EE,ME> CR: 42042084 XER: 00000000 DEAR: 00000004, ESR: 00000000 GPR00: f2266e10 ef635c60 c4e72e20 00000001 00000014 ef635c69 00000001 c11f3082 GPR08: 00000010 00000000 00000002 2f635d58 22044084 00000000 00000000 c0755c80 GPR16: c4bf1000 ef784000 00000000 00000000 00000020 00000014 00000010 ef2f6100 GPR24: ef2f6200 00000024 ef143210 ef2f6000 00000000 ef635d58 00000000 2f635d58 NIP [f2265a28] sg_count+0x1c/0xb4 [talitos] LR [f2266ad8] talitos_edesc_alloc+0x12c/0x410 [talitos] Call Trace: [ef635c60] [c0552068] schedule_timeout+0x148/0x1ac (unreliable) [ef635cc0] [f2266e10] aead_edesc_alloc+0x54/0x64 [talitos] [ef635ce0] [f22680f0] aead_encrypt+0x24/0x70 [talitos] [ef635cf0] [c024b948] __test_aead+0x494/0xf68 [ef635e20] [c024d54c] test_aead+0x64/0xcc [ef635e40] [c024d604] alg_test_aead+0x50/0xc4 [ef635e60] [c024c838] alg_test+0x10c/0x2e4 [ef635ee0] [c0249d1c] cryptomgr_test+0x4c/0x54 [ef635ef0] [c005d598] kthread+0xa8/0xac [ef635f40] [c000e3bc] ret_from_kernel_thread+0x5c/0x64 Instruction dump: 81230024 552807fe 0f080000 5523003a 4bffff24 39000000 2c040000 99050000 408100a0 7c691b78 38c00001 38600000 <80e90004> 38630001 8109000c 70ea0002 ---[ end trace 4498123cd8478591 ]--- Signed-off-by: Horia Geanta <horia.geanta@freescale.com> Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
2013-11-28 21:11:17 +08:00
edesc = aead_edesc_alloc(req, req->iv, 0, true);
if (IS_ERR(edesc))
return PTR_ERR(edesc);
/* set encrypt */
edesc->desc.hdr = ctx->desc_hdr_template | DESC_HDR_MODE0_ENCRYPT;
return ipsec_esp(edesc, req, ipsec_esp_encrypt_done);
}
static int aead_decrypt(struct aead_request *req)
{
struct crypto_aead *authenc = crypto_aead_reqtfm(req);
unsigned int authsize = crypto_aead_authsize(authenc);
struct talitos_ctx *ctx = crypto_aead_ctx(authenc);
struct talitos_private *priv = dev_get_drvdata(ctx->dev);
struct talitos_edesc *edesc;
struct scatterlist *sg;
void *icvdata;
req->cryptlen -= authsize;
/* allocate extended descriptor */
crypto: talitos - fix aead sglen for case 'dst != src' For aead case when source and destination buffers are different, there is an incorrect assumption that the source length includes the ICV length. Fix this, since it leads to an oops when using sg_count() to find the number of nents in the scatterlist: Unable to handle kernel paging request for data at address 0x00000004 Faulting instruction address: 0xf2265a28 Oops: Kernel access of bad area, sig: 11 [#1] SMP NR_CPUS=8 P2020 RDB Modules linked in: talitos(+) CPU: 1 PID: 2187 Comm: cryptomgr_test Not tainted 3.11.0 #12 task: c4e72e20 ti: ef634000 task.ti: ef634000 NIP: f2265a28 LR: f2266ad8 CTR: c000c900 REGS: ef635bb0 TRAP: 0300 Not tainted (3.11.0) MSR: 00029000 <CE,EE,ME> CR: 42042084 XER: 00000000 DEAR: 00000004, ESR: 00000000 GPR00: f2266e10 ef635c60 c4e72e20 00000001 00000014 ef635c69 00000001 c11f3082 GPR08: 00000010 00000000 00000002 2f635d58 22044084 00000000 00000000 c0755c80 GPR16: c4bf1000 ef784000 00000000 00000000 00000020 00000014 00000010 ef2f6100 GPR24: ef2f6200 00000024 ef143210 ef2f6000 00000000 ef635d58 00000000 2f635d58 NIP [f2265a28] sg_count+0x1c/0xb4 [talitos] LR [f2266ad8] talitos_edesc_alloc+0x12c/0x410 [talitos] Call Trace: [ef635c60] [c0552068] schedule_timeout+0x148/0x1ac (unreliable) [ef635cc0] [f2266e10] aead_edesc_alloc+0x54/0x64 [talitos] [ef635ce0] [f22680f0] aead_encrypt+0x24/0x70 [talitos] [ef635cf0] [c024b948] __test_aead+0x494/0xf68 [ef635e20] [c024d54c] test_aead+0x64/0xcc [ef635e40] [c024d604] alg_test_aead+0x50/0xc4 [ef635e60] [c024c838] alg_test+0x10c/0x2e4 [ef635ee0] [c0249d1c] cryptomgr_test+0x4c/0x54 [ef635ef0] [c005d598] kthread+0xa8/0xac [ef635f40] [c000e3bc] ret_from_kernel_thread+0x5c/0x64 Instruction dump: 81230024 552807fe 0f080000 5523003a 4bffff24 39000000 2c040000 99050000 408100a0 7c691b78 38c00001 38600000 <80e90004> 38630001 8109000c 70ea0002 ---[ end trace 4498123cd8478591 ]--- Signed-off-by: Horia Geanta <horia.geanta@freescale.com> Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
2013-11-28 21:11:17 +08:00
edesc = aead_edesc_alloc(req, req->iv, 1, false);
if (IS_ERR(edesc))
return PTR_ERR(edesc);
if ((priv->features & TALITOS_FTR_HW_AUTH_CHECK) &&
((!edesc->src_nents && !edesc->dst_nents) ||
priv->features & TALITOS_FTR_SRC_LINK_TBL_LEN_INCLUDES_EXTENT)) {
/* decrypt and check the ICV */
edesc->desc.hdr = ctx->desc_hdr_template |
DESC_HDR_DIR_INBOUND |
DESC_HDR_MODE1_MDEU_CICV;
/* reset integrity check result bits */
return ipsec_esp(edesc, req, ipsec_esp_decrypt_hwauth_done);
}
/* Have to check the ICV with software */
edesc->desc.hdr = ctx->desc_hdr_template | DESC_HDR_DIR_INBOUND;
/* stash incoming ICV for later cmp with ICV generated by the h/w */
if (edesc->dma_len)
icvdata = (char *)&edesc->link_tbl[edesc->src_nents +
edesc->dst_nents + 2];
else
icvdata = &edesc->link_tbl[0];
sg = sg_last(req->src, edesc->src_nents ? : 1);
memcpy(icvdata, (char *)sg_virt(sg) + sg->length - authsize, authsize);
return ipsec_esp(edesc, req, ipsec_esp_decrypt_swauth_done);
}
static int ablkcipher_setkey(struct crypto_ablkcipher *cipher,
const u8 *key, unsigned int keylen)
{
struct talitos_ctx *ctx = crypto_ablkcipher_ctx(cipher);
struct device *dev = ctx->dev;
u32 tmp[DES_EXPKEY_WORDS];
if (keylen > TALITOS_MAX_KEY_SIZE) {
crypto_ablkcipher_set_flags(cipher, CRYPTO_TFM_RES_BAD_KEY_LEN);
return -EINVAL;
}
if (unlikely(crypto_ablkcipher_get_flags(cipher) &
CRYPTO_TFM_REQ_WEAK_KEY) &&
!des_ekey(tmp, key)) {
crypto_ablkcipher_set_flags(cipher, CRYPTO_TFM_RES_WEAK_KEY);
return -EINVAL;
}
if (ctx->keylen)
dma_unmap_single(dev, ctx->dma_key, ctx->keylen, DMA_TO_DEVICE);
memcpy(&ctx->key, key, keylen);
ctx->keylen = keylen;
ctx->dma_key = dma_map_single(dev, ctx->key, keylen, DMA_TO_DEVICE);
return 0;
}
static void common_nonsnoop_unmap(struct device *dev,
struct talitos_edesc *edesc,
struct ablkcipher_request *areq)
{
unmap_single_talitos_ptr(dev, &edesc->desc.ptr[5], DMA_FROM_DEVICE);
talitos_sg_unmap(dev, edesc, areq->src, areq->dst, areq->nbytes, 0);
unmap_single_talitos_ptr(dev, &edesc->desc.ptr[1], DMA_TO_DEVICE);
if (edesc->dma_len)
dma_unmap_single(dev, edesc->dma_link_tbl, edesc->dma_len,
DMA_BIDIRECTIONAL);
}
static void ablkcipher_done(struct device *dev,
struct talitos_desc *desc, void *context,
int err)
{
struct ablkcipher_request *areq = context;
struct talitos_edesc *edesc;
edesc = container_of(desc, struct talitos_edesc, desc);
common_nonsnoop_unmap(dev, edesc, areq);
kfree(edesc);
areq->base.complete(&areq->base, err);
}
static int common_nonsnoop(struct talitos_edesc *edesc,
struct ablkcipher_request *areq,
void (*callback) (struct device *dev,
struct talitos_desc *desc,
void *context, int error))
{
struct crypto_ablkcipher *cipher = crypto_ablkcipher_reqtfm(areq);
struct talitos_ctx *ctx = crypto_ablkcipher_ctx(cipher);
struct device *dev = ctx->dev;
struct talitos_desc *desc = &edesc->desc;
unsigned int cryptlen = areq->nbytes;
unsigned int ivsize = crypto_ablkcipher_ivsize(cipher);
int sg_count, ret;
bool sync_needed = false;
struct talitos_private *priv = dev_get_drvdata(dev);
bool is_sec1 = has_ftr_sec1(priv);
/* first DWORD empty */
/* cipher iv */
to_talitos_ptr(&desc->ptr[1], edesc->iv_dma, ivsize, is_sec1);
/* cipher key */
to_talitos_ptr(&desc->ptr[2], ctx->dma_key, ctx->keylen, is_sec1);
sg_count = edesc->src_nents ?: 1;
if (is_sec1 && sg_count > 1)
sg_copy_to_buffer(areq->src, sg_count, edesc->buf,
cryptlen);
else
sg_count = dma_map_sg(dev, areq->src, sg_count,
(areq->src == areq->dst) ?
DMA_BIDIRECTIONAL : DMA_TO_DEVICE);
/*
* cipher in
*/
sg_count = talitos_sg_map(dev, areq->src, cryptlen, edesc,
&desc->ptr[3], sg_count, 0, 0);
if (sg_count > 1)
sync_needed = true;
/* cipher out */
if (areq->src != areq->dst) {
sg_count = edesc->dst_nents ? : 1;
if (!is_sec1 || sg_count == 1)
dma_map_sg(dev, areq->dst, sg_count, DMA_FROM_DEVICE);
}
ret = talitos_sg_map(dev, areq->dst, cryptlen, edesc, &desc->ptr[4],
sg_count, 0, (edesc->src_nents + 1));
if (ret > 1)
sync_needed = true;
/* iv out */
map_single_talitos_ptr(dev, &desc->ptr[5], ivsize, ctx->iv,
DMA_FROM_DEVICE);
/* last DWORD empty */
if (sync_needed)
dma_sync_single_for_device(dev, edesc->dma_link_tbl,
edesc->dma_len, DMA_BIDIRECTIONAL);
ret = talitos_submit(dev, ctx->ch, desc, callback, areq);
if (ret != -EINPROGRESS) {
common_nonsnoop_unmap(dev, edesc, areq);
kfree(edesc);
}
return ret;
}
static struct talitos_edesc *ablkcipher_edesc_alloc(struct ablkcipher_request *
crypto: talitos - fix aead sglen for case 'dst != src' For aead case when source and destination buffers are different, there is an incorrect assumption that the source length includes the ICV length. Fix this, since it leads to an oops when using sg_count() to find the number of nents in the scatterlist: Unable to handle kernel paging request for data at address 0x00000004 Faulting instruction address: 0xf2265a28 Oops: Kernel access of bad area, sig: 11 [#1] SMP NR_CPUS=8 P2020 RDB Modules linked in: talitos(+) CPU: 1 PID: 2187 Comm: cryptomgr_test Not tainted 3.11.0 #12 task: c4e72e20 ti: ef634000 task.ti: ef634000 NIP: f2265a28 LR: f2266ad8 CTR: c000c900 REGS: ef635bb0 TRAP: 0300 Not tainted (3.11.0) MSR: 00029000 <CE,EE,ME> CR: 42042084 XER: 00000000 DEAR: 00000004, ESR: 00000000 GPR00: f2266e10 ef635c60 c4e72e20 00000001 00000014 ef635c69 00000001 c11f3082 GPR08: 00000010 00000000 00000002 2f635d58 22044084 00000000 00000000 c0755c80 GPR16: c4bf1000 ef784000 00000000 00000000 00000020 00000014 00000010 ef2f6100 GPR24: ef2f6200 00000024 ef143210 ef2f6000 00000000 ef635d58 00000000 2f635d58 NIP [f2265a28] sg_count+0x1c/0xb4 [talitos] LR [f2266ad8] talitos_edesc_alloc+0x12c/0x410 [talitos] Call Trace: [ef635c60] [c0552068] schedule_timeout+0x148/0x1ac (unreliable) [ef635cc0] [f2266e10] aead_edesc_alloc+0x54/0x64 [talitos] [ef635ce0] [f22680f0] aead_encrypt+0x24/0x70 [talitos] [ef635cf0] [c024b948] __test_aead+0x494/0xf68 [ef635e20] [c024d54c] test_aead+0x64/0xcc [ef635e40] [c024d604] alg_test_aead+0x50/0xc4 [ef635e60] [c024c838] alg_test+0x10c/0x2e4 [ef635ee0] [c0249d1c] cryptomgr_test+0x4c/0x54 [ef635ef0] [c005d598] kthread+0xa8/0xac [ef635f40] [c000e3bc] ret_from_kernel_thread+0x5c/0x64 Instruction dump: 81230024 552807fe 0f080000 5523003a 4bffff24 39000000 2c040000 99050000 408100a0 7c691b78 38c00001 38600000 <80e90004> 38630001 8109000c 70ea0002 ---[ end trace 4498123cd8478591 ]--- Signed-off-by: Horia Geanta <horia.geanta@freescale.com> Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
2013-11-28 21:11:17 +08:00
areq, bool encrypt)
{
struct crypto_ablkcipher *cipher = crypto_ablkcipher_reqtfm(areq);
struct talitos_ctx *ctx = crypto_ablkcipher_ctx(cipher);
unsigned int ivsize = crypto_ablkcipher_ivsize(cipher);
return talitos_edesc_alloc(ctx->dev, areq->src, areq->dst,
areq->info, 0, areq->nbytes, 0, ivsize, 0,
crypto: talitos - fix aead sglen for case 'dst != src' For aead case when source and destination buffers are different, there is an incorrect assumption that the source length includes the ICV length. Fix this, since it leads to an oops when using sg_count() to find the number of nents in the scatterlist: Unable to handle kernel paging request for data at address 0x00000004 Faulting instruction address: 0xf2265a28 Oops: Kernel access of bad area, sig: 11 [#1] SMP NR_CPUS=8 P2020 RDB Modules linked in: talitos(+) CPU: 1 PID: 2187 Comm: cryptomgr_test Not tainted 3.11.0 #12 task: c4e72e20 ti: ef634000 task.ti: ef634000 NIP: f2265a28 LR: f2266ad8 CTR: c000c900 REGS: ef635bb0 TRAP: 0300 Not tainted (3.11.0) MSR: 00029000 <CE,EE,ME> CR: 42042084 XER: 00000000 DEAR: 00000004, ESR: 00000000 GPR00: f2266e10 ef635c60 c4e72e20 00000001 00000014 ef635c69 00000001 c11f3082 GPR08: 00000010 00000000 00000002 2f635d58 22044084 00000000 00000000 c0755c80 GPR16: c4bf1000 ef784000 00000000 00000000 00000020 00000014 00000010 ef2f6100 GPR24: ef2f6200 00000024 ef143210 ef2f6000 00000000 ef635d58 00000000 2f635d58 NIP [f2265a28] sg_count+0x1c/0xb4 [talitos] LR [f2266ad8] talitos_edesc_alloc+0x12c/0x410 [talitos] Call Trace: [ef635c60] [c0552068] schedule_timeout+0x148/0x1ac (unreliable) [ef635cc0] [f2266e10] aead_edesc_alloc+0x54/0x64 [talitos] [ef635ce0] [f22680f0] aead_encrypt+0x24/0x70 [talitos] [ef635cf0] [c024b948] __test_aead+0x494/0xf68 [ef635e20] [c024d54c] test_aead+0x64/0xcc [ef635e40] [c024d604] alg_test_aead+0x50/0xc4 [ef635e60] [c024c838] alg_test+0x10c/0x2e4 [ef635ee0] [c0249d1c] cryptomgr_test+0x4c/0x54 [ef635ef0] [c005d598] kthread+0xa8/0xac [ef635f40] [c000e3bc] ret_from_kernel_thread+0x5c/0x64 Instruction dump: 81230024 552807fe 0f080000 5523003a 4bffff24 39000000 2c040000 99050000 408100a0 7c691b78 38c00001 38600000 <80e90004> 38630001 8109000c 70ea0002 ---[ end trace 4498123cd8478591 ]--- Signed-off-by: Horia Geanta <horia.geanta@freescale.com> Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
2013-11-28 21:11:17 +08:00
areq->base.flags, encrypt);
}
static int ablkcipher_encrypt(struct ablkcipher_request *areq)
{
struct crypto_ablkcipher *cipher = crypto_ablkcipher_reqtfm(areq);
struct talitos_ctx *ctx = crypto_ablkcipher_ctx(cipher);
struct talitos_edesc *edesc;
/* allocate extended descriptor */
crypto: talitos - fix aead sglen for case 'dst != src' For aead case when source and destination buffers are different, there is an incorrect assumption that the source length includes the ICV length. Fix this, since it leads to an oops when using sg_count() to find the number of nents in the scatterlist: Unable to handle kernel paging request for data at address 0x00000004 Faulting instruction address: 0xf2265a28 Oops: Kernel access of bad area, sig: 11 [#1] SMP NR_CPUS=8 P2020 RDB Modules linked in: talitos(+) CPU: 1 PID: 2187 Comm: cryptomgr_test Not tainted 3.11.0 #12 task: c4e72e20 ti: ef634000 task.ti: ef634000 NIP: f2265a28 LR: f2266ad8 CTR: c000c900 REGS: ef635bb0 TRAP: 0300 Not tainted (3.11.0) MSR: 00029000 <CE,EE,ME> CR: 42042084 XER: 00000000 DEAR: 00000004, ESR: 00000000 GPR00: f2266e10 ef635c60 c4e72e20 00000001 00000014 ef635c69 00000001 c11f3082 GPR08: 00000010 00000000 00000002 2f635d58 22044084 00000000 00000000 c0755c80 GPR16: c4bf1000 ef784000 00000000 00000000 00000020 00000014 00000010 ef2f6100 GPR24: ef2f6200 00000024 ef143210 ef2f6000 00000000 ef635d58 00000000 2f635d58 NIP [f2265a28] sg_count+0x1c/0xb4 [talitos] LR [f2266ad8] talitos_edesc_alloc+0x12c/0x410 [talitos] Call Trace: [ef635c60] [c0552068] schedule_timeout+0x148/0x1ac (unreliable) [ef635cc0] [f2266e10] aead_edesc_alloc+0x54/0x64 [talitos] [ef635ce0] [f22680f0] aead_encrypt+0x24/0x70 [talitos] [ef635cf0] [c024b948] __test_aead+0x494/0xf68 [ef635e20] [c024d54c] test_aead+0x64/0xcc [ef635e40] [c024d604] alg_test_aead+0x50/0xc4 [ef635e60] [c024c838] alg_test+0x10c/0x2e4 [ef635ee0] [c0249d1c] cryptomgr_test+0x4c/0x54 [ef635ef0] [c005d598] kthread+0xa8/0xac [ef635f40] [c000e3bc] ret_from_kernel_thread+0x5c/0x64 Instruction dump: 81230024 552807fe 0f080000 5523003a 4bffff24 39000000 2c040000 99050000 408100a0 7c691b78 38c00001 38600000 <80e90004> 38630001 8109000c 70ea0002 ---[ end trace 4498123cd8478591 ]--- Signed-off-by: Horia Geanta <horia.geanta@freescale.com> Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
2013-11-28 21:11:17 +08:00
edesc = ablkcipher_edesc_alloc(areq, true);
if (IS_ERR(edesc))
return PTR_ERR(edesc);
/* set encrypt */
edesc->desc.hdr = ctx->desc_hdr_template | DESC_HDR_MODE0_ENCRYPT;
return common_nonsnoop(edesc, areq, ablkcipher_done);
}
static int ablkcipher_decrypt(struct ablkcipher_request *areq)
{
struct crypto_ablkcipher *cipher = crypto_ablkcipher_reqtfm(areq);
struct talitos_ctx *ctx = crypto_ablkcipher_ctx(cipher);
struct talitos_edesc *edesc;
/* allocate extended descriptor */
crypto: talitos - fix aead sglen for case 'dst != src' For aead case when source and destination buffers are different, there is an incorrect assumption that the source length includes the ICV length. Fix this, since it leads to an oops when using sg_count() to find the number of nents in the scatterlist: Unable to handle kernel paging request for data at address 0x00000004 Faulting instruction address: 0xf2265a28 Oops: Kernel access of bad area, sig: 11 [#1] SMP NR_CPUS=8 P2020 RDB Modules linked in: talitos(+) CPU: 1 PID: 2187 Comm: cryptomgr_test Not tainted 3.11.0 #12 task: c4e72e20 ti: ef634000 task.ti: ef634000 NIP: f2265a28 LR: f2266ad8 CTR: c000c900 REGS: ef635bb0 TRAP: 0300 Not tainted (3.11.0) MSR: 00029000 <CE,EE,ME> CR: 42042084 XER: 00000000 DEAR: 00000004, ESR: 00000000 GPR00: f2266e10 ef635c60 c4e72e20 00000001 00000014 ef635c69 00000001 c11f3082 GPR08: 00000010 00000000 00000002 2f635d58 22044084 00000000 00000000 c0755c80 GPR16: c4bf1000 ef784000 00000000 00000000 00000020 00000014 00000010 ef2f6100 GPR24: ef2f6200 00000024 ef143210 ef2f6000 00000000 ef635d58 00000000 2f635d58 NIP [f2265a28] sg_count+0x1c/0xb4 [talitos] LR [f2266ad8] talitos_edesc_alloc+0x12c/0x410 [talitos] Call Trace: [ef635c60] [c0552068] schedule_timeout+0x148/0x1ac (unreliable) [ef635cc0] [f2266e10] aead_edesc_alloc+0x54/0x64 [talitos] [ef635ce0] [f22680f0] aead_encrypt+0x24/0x70 [talitos] [ef635cf0] [c024b948] __test_aead+0x494/0xf68 [ef635e20] [c024d54c] test_aead+0x64/0xcc [ef635e40] [c024d604] alg_test_aead+0x50/0xc4 [ef635e60] [c024c838] alg_test+0x10c/0x2e4 [ef635ee0] [c0249d1c] cryptomgr_test+0x4c/0x54 [ef635ef0] [c005d598] kthread+0xa8/0xac [ef635f40] [c000e3bc] ret_from_kernel_thread+0x5c/0x64 Instruction dump: 81230024 552807fe 0f080000 5523003a 4bffff24 39000000 2c040000 99050000 408100a0 7c691b78 38c00001 38600000 <80e90004> 38630001 8109000c 70ea0002 ---[ end trace 4498123cd8478591 ]--- Signed-off-by: Horia Geanta <horia.geanta@freescale.com> Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
2013-11-28 21:11:17 +08:00
edesc = ablkcipher_edesc_alloc(areq, false);
if (IS_ERR(edesc))
return PTR_ERR(edesc);
edesc->desc.hdr = ctx->desc_hdr_template | DESC_HDR_DIR_INBOUND;
return common_nonsnoop(edesc, areq, ablkcipher_done);
}
static void common_nonsnoop_hash_unmap(struct device *dev,
struct talitos_edesc *edesc,
struct ahash_request *areq)
{
struct talitos_ahash_req_ctx *req_ctx = ahash_request_ctx(areq);
talitos_sg_unmap(dev, edesc, req_ctx->psrc, NULL, 0, 0);
if (edesc->dma_len)
dma_unmap_single(dev, edesc->dma_link_tbl, edesc->dma_len,
DMA_BIDIRECTIONAL);
crypto: talitos - chain in buffered data for ahash on SEC1 SEC1 doesn't support S/G in descriptors so for hash operations, the CPU has to build a buffer containing the buffered block and the incoming data. This generates a lot of memory copies which represents more than 50% of CPU time of a md5sum operation as shown below with a 'perf record'. |--86.24%-- kcapi_md_digest | | | |--86.18%-- _kcapi_common_vmsplice_chunk_fd | | | | | |--83.68%-- splice | | | | | | | |--83.59%-- ret_from_syscall | | | | | | | | | |--83.52%-- sys_splice | | | | | | | | | | | |--83.49%-- splice_from_pipe | | | | | | | | | | | | | |--83.04%-- __splice_from_pipe | | | | | | | | | | | | | | | |--80.67%-- pipe_to_sendpage | | | | | | | | | | | | | | | | | |--78.25%-- hash_sendpage | | | | | | | | | | | | | | | | | | | |--60.08%-- ahash_process_req | | | | | | | | | | | | | | | | | | | | | |--56.36%-- sg_copy_buffer | | | | | | | | | | | | | | | | | | | | | | | |--55.29%-- memcpy | | | | | | | | | | | | However, unlike SEC2+, SEC1 offers the possibility to chain descriptors. It is therefore possible to build a first descriptor pointing to the buffered data and a second descriptor pointing to the incoming data, hence avoiding the memory copy to a single buffer. With this patch, the time necessary for a md5sum on a 90Mbytes file is approximately 3 seconds. Without the patch it takes 6 seconds. Signed-off-by: Christophe Leroy <christophe.leroy@c-s.fr> Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
2017-10-06 21:05:06 +08:00
if (edesc->desc.next_desc)
dma_unmap_single(dev, be32_to_cpu(edesc->desc.next_desc),
TALITOS_DESC_SIZE, DMA_BIDIRECTIONAL);
}
static void ahash_done(struct device *dev,
struct talitos_desc *desc, void *context,
int err)
{
struct ahash_request *areq = context;
struct talitos_edesc *edesc =
container_of(desc, struct talitos_edesc, desc);
struct talitos_ahash_req_ctx *req_ctx = ahash_request_ctx(areq);
if (!req_ctx->last && req_ctx->to_hash_later) {
/* Position any partial block for next update/final/finup */
req_ctx->buf_idx = (req_ctx->buf_idx + 1) & 1;
req_ctx->nbuf = req_ctx->to_hash_later;
}
common_nonsnoop_hash_unmap(dev, edesc, areq);
kfree(edesc);
areq->base.complete(&areq->base, err);
}
/*
* SEC1 doesn't like hashing of 0 sized message, so we do the padding
* ourself and submit a padded block
*/
static void talitos_handle_buggy_hash(struct talitos_ctx *ctx,
struct talitos_edesc *edesc,
struct talitos_ptr *ptr)
{
static u8 padded_hash[64] = {
0x80, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
};
pr_err_once("Bug in SEC1, padding ourself\n");
edesc->desc.hdr &= ~DESC_HDR_MODE0_MDEU_PAD;
map_single_talitos_ptr(ctx->dev, ptr, sizeof(padded_hash),
(char *)padded_hash, DMA_TO_DEVICE);
}
static int common_nonsnoop_hash(struct talitos_edesc *edesc,
struct ahash_request *areq, unsigned int length,
crypto: talitos - chain in buffered data for ahash on SEC1 SEC1 doesn't support S/G in descriptors so for hash operations, the CPU has to build a buffer containing the buffered block and the incoming data. This generates a lot of memory copies which represents more than 50% of CPU time of a md5sum operation as shown below with a 'perf record'. |--86.24%-- kcapi_md_digest | | | |--86.18%-- _kcapi_common_vmsplice_chunk_fd | | | | | |--83.68%-- splice | | | | | | | |--83.59%-- ret_from_syscall | | | | | | | | | |--83.52%-- sys_splice | | | | | | | | | | | |--83.49%-- splice_from_pipe | | | | | | | | | | | | | |--83.04%-- __splice_from_pipe | | | | | | | | | | | | | | | |--80.67%-- pipe_to_sendpage | | | | | | | | | | | | | | | | | |--78.25%-- hash_sendpage | | | | | | | | | | | | | | | | | | | |--60.08%-- ahash_process_req | | | | | | | | | | | | | | | | | | | | | |--56.36%-- sg_copy_buffer | | | | | | | | | | | | | | | | | | | | | | | |--55.29%-- memcpy | | | | | | | | | | | | However, unlike SEC2+, SEC1 offers the possibility to chain descriptors. It is therefore possible to build a first descriptor pointing to the buffered data and a second descriptor pointing to the incoming data, hence avoiding the memory copy to a single buffer. With this patch, the time necessary for a md5sum on a 90Mbytes file is approximately 3 seconds. Without the patch it takes 6 seconds. Signed-off-by: Christophe Leroy <christophe.leroy@c-s.fr> Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
2017-10-06 21:05:06 +08:00
unsigned int offset,
void (*callback) (struct device *dev,
struct talitos_desc *desc,
void *context, int error))
{
struct crypto_ahash *tfm = crypto_ahash_reqtfm(areq);
struct talitos_ctx *ctx = crypto_ahash_ctx(tfm);
struct talitos_ahash_req_ctx *req_ctx = ahash_request_ctx(areq);
struct device *dev = ctx->dev;
struct talitos_desc *desc = &edesc->desc;
int ret;
bool sync_needed = false;
struct talitos_private *priv = dev_get_drvdata(dev);
bool is_sec1 = has_ftr_sec1(priv);
int sg_count;
/* first DWORD empty */
/* hash context in */
if (!req_ctx->first || req_ctx->swinit) {
to_talitos_ptr(&desc->ptr[1], ctx->dma_hw_context,
req_ctx->hw_context_size, is_sec1);
req_ctx->swinit = 0;
}
/* Indicate next op is not the first. */
req_ctx->first = 0;
/* HMAC key */
if (ctx->keylen)
to_talitos_ptr(&desc->ptr[2], ctx->dma_key, ctx->keylen,
is_sec1);
crypto: talitos - chain in buffered data for ahash on SEC1 SEC1 doesn't support S/G in descriptors so for hash operations, the CPU has to build a buffer containing the buffered block and the incoming data. This generates a lot of memory copies which represents more than 50% of CPU time of a md5sum operation as shown below with a 'perf record'. |--86.24%-- kcapi_md_digest | | | |--86.18%-- _kcapi_common_vmsplice_chunk_fd | | | | | |--83.68%-- splice | | | | | | | |--83.59%-- ret_from_syscall | | | | | | | | | |--83.52%-- sys_splice | | | | | | | | | | | |--83.49%-- splice_from_pipe | | | | | | | | | | | | | |--83.04%-- __splice_from_pipe | | | | | | | | | | | | | | | |--80.67%-- pipe_to_sendpage | | | | | | | | | | | | | | | | | |--78.25%-- hash_sendpage | | | | | | | | | | | | | | | | | | | |--60.08%-- ahash_process_req | | | | | | | | | | | | | | | | | | | | | |--56.36%-- sg_copy_buffer | | | | | | | | | | | | | | | | | | | | | | | |--55.29%-- memcpy | | | | | | | | | | | | However, unlike SEC2+, SEC1 offers the possibility to chain descriptors. It is therefore possible to build a first descriptor pointing to the buffered data and a second descriptor pointing to the incoming data, hence avoiding the memory copy to a single buffer. With this patch, the time necessary for a md5sum on a 90Mbytes file is approximately 3 seconds. Without the patch it takes 6 seconds. Signed-off-by: Christophe Leroy <christophe.leroy@c-s.fr> Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
2017-10-06 21:05:06 +08:00
if (is_sec1 && req_ctx->nbuf)
length -= req_ctx->nbuf;
sg_count = edesc->src_nents ?: 1;
if (is_sec1 && sg_count > 1)
crypto: talitos - chain in buffered data for ahash on SEC1 SEC1 doesn't support S/G in descriptors so for hash operations, the CPU has to build a buffer containing the buffered block and the incoming data. This generates a lot of memory copies which represents more than 50% of CPU time of a md5sum operation as shown below with a 'perf record'. |--86.24%-- kcapi_md_digest | | | |--86.18%-- _kcapi_common_vmsplice_chunk_fd | | | | | |--83.68%-- splice | | | | | | | |--83.59%-- ret_from_syscall | | | | | | | | | |--83.52%-- sys_splice | | | | | | | | | | | |--83.49%-- splice_from_pipe | | | | | | | | | | | | | |--83.04%-- __splice_from_pipe | | | | | | | | | | | | | | | |--80.67%-- pipe_to_sendpage | | | | | | | | | | | | | | | | | |--78.25%-- hash_sendpage | | | | | | | | | | | | | | | | | | | |--60.08%-- ahash_process_req | | | | | | | | | | | | | | | | | | | | | |--56.36%-- sg_copy_buffer | | | | | | | | | | | | | | | | | | | | | | | |--55.29%-- memcpy | | | | | | | | | | | | However, unlike SEC2+, SEC1 offers the possibility to chain descriptors. It is therefore possible to build a first descriptor pointing to the buffered data and a second descriptor pointing to the incoming data, hence avoiding the memory copy to a single buffer. With this patch, the time necessary for a md5sum on a 90Mbytes file is approximately 3 seconds. Without the patch it takes 6 seconds. Signed-off-by: Christophe Leroy <christophe.leroy@c-s.fr> Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
2017-10-06 21:05:06 +08:00
sg_pcopy_to_buffer(req_ctx->psrc, sg_count,
edesc->buf + sizeof(struct talitos_desc),
length, req_ctx->nbuf);
else if (length)
sg_count = dma_map_sg(dev, req_ctx->psrc, sg_count,
DMA_TO_DEVICE);
/*
* data in
*/
crypto: talitos - chain in buffered data for ahash on SEC1 SEC1 doesn't support S/G in descriptors so for hash operations, the CPU has to build a buffer containing the buffered block and the incoming data. This generates a lot of memory copies which represents more than 50% of CPU time of a md5sum operation as shown below with a 'perf record'. |--86.24%-- kcapi_md_digest | | | |--86.18%-- _kcapi_common_vmsplice_chunk_fd | | | | | |--83.68%-- splice | | | | | | | |--83.59%-- ret_from_syscall | | | | | | | | | |--83.52%-- sys_splice | | | | | | | | | | | |--83.49%-- splice_from_pipe | | | | | | | | | | | | | |--83.04%-- __splice_from_pipe | | | | | | | | | | | | | | | |--80.67%-- pipe_to_sendpage | | | | | | | | | | | | | | | | | |--78.25%-- hash_sendpage | | | | | | | | | | | | | | | | | | | |--60.08%-- ahash_process_req | | | | | | | | | | | | | | | | | | | | | |--56.36%-- sg_copy_buffer | | | | | | | | | | | | | | | | | | | | | | | |--55.29%-- memcpy | | | | | | | | | | | | However, unlike SEC2+, SEC1 offers the possibility to chain descriptors. It is therefore possible to build a first descriptor pointing to the buffered data and a second descriptor pointing to the incoming data, hence avoiding the memory copy to a single buffer. With this patch, the time necessary for a md5sum on a 90Mbytes file is approximately 3 seconds. Without the patch it takes 6 seconds. Signed-off-by: Christophe Leroy <christophe.leroy@c-s.fr> Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
2017-10-06 21:05:06 +08:00
if (is_sec1 && req_ctx->nbuf) {
dma_addr_t dma_buf = ctx->dma_buf + req_ctx->buf_idx *
HASH_MAX_BLOCK_SIZE;
to_talitos_ptr(&desc->ptr[3], dma_buf, req_ctx->nbuf, is_sec1);
crypto: talitos - chain in buffered data for ahash on SEC1 SEC1 doesn't support S/G in descriptors so for hash operations, the CPU has to build a buffer containing the buffered block and the incoming data. This generates a lot of memory copies which represents more than 50% of CPU time of a md5sum operation as shown below with a 'perf record'. |--86.24%-- kcapi_md_digest | | | |--86.18%-- _kcapi_common_vmsplice_chunk_fd | | | | | |--83.68%-- splice | | | | | | | |--83.59%-- ret_from_syscall | | | | | | | | | |--83.52%-- sys_splice | | | | | | | | | | | |--83.49%-- splice_from_pipe | | | | | | | | | | | | | |--83.04%-- __splice_from_pipe | | | | | | | | | | | | | | | |--80.67%-- pipe_to_sendpage | | | | | | | | | | | | | | | | | |--78.25%-- hash_sendpage | | | | | | | | | | | | | | | | | | | |--60.08%-- ahash_process_req | | | | | | | | | | | | | | | | | | | | | |--56.36%-- sg_copy_buffer | | | | | | | | | | | | | | | | | | | | | | | |--55.29%-- memcpy | | | | | | | | | | | | However, unlike SEC2+, SEC1 offers the possibility to chain descriptors. It is therefore possible to build a first descriptor pointing to the buffered data and a second descriptor pointing to the incoming data, hence avoiding the memory copy to a single buffer. With this patch, the time necessary for a md5sum on a 90Mbytes file is approximately 3 seconds. Without the patch it takes 6 seconds. Signed-off-by: Christophe Leroy <christophe.leroy@c-s.fr> Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
2017-10-06 21:05:06 +08:00
} else {
sg_count = talitos_sg_map(dev, req_ctx->psrc, length, edesc,
&desc->ptr[3], sg_count, offset, 0);
if (sg_count > 1)
sync_needed = true;
}
/* fifth DWORD empty */
/* hash/HMAC out -or- hash context out */
if (req_ctx->last)
map_single_talitos_ptr(dev, &desc->ptr[5],
crypto_ahash_digestsize(tfm),
areq->result, DMA_FROM_DEVICE);
else
to_talitos_ptr(&desc->ptr[5], ctx->dma_hw_context,
req_ctx->hw_context_size, is_sec1);
/* last DWORD empty */
if (is_sec1 && from_talitos_ptr_len(&desc->ptr[3], true) == 0)
talitos_handle_buggy_hash(ctx, edesc, &desc->ptr[3]);
crypto: talitos - chain in buffered data for ahash on SEC1 SEC1 doesn't support S/G in descriptors so for hash operations, the CPU has to build a buffer containing the buffered block and the incoming data. This generates a lot of memory copies which represents more than 50% of CPU time of a md5sum operation as shown below with a 'perf record'. |--86.24%-- kcapi_md_digest | | | |--86.18%-- _kcapi_common_vmsplice_chunk_fd | | | | | |--83.68%-- splice | | | | | | | |--83.59%-- ret_from_syscall | | | | | | | | | |--83.52%-- sys_splice | | | | | | | | | | | |--83.49%-- splice_from_pipe | | | | | | | | | | | | | |--83.04%-- __splice_from_pipe | | | | | | | | | | | | | | | |--80.67%-- pipe_to_sendpage | | | | | | | | | | | | | | | | | |--78.25%-- hash_sendpage | | | | | | | | | | | | | | | | | | | |--60.08%-- ahash_process_req | | | | | | | | | | | | | | | | | | | | | |--56.36%-- sg_copy_buffer | | | | | | | | | | | | | | | | | | | | | | | |--55.29%-- memcpy | | | | | | | | | | | | However, unlike SEC2+, SEC1 offers the possibility to chain descriptors. It is therefore possible to build a first descriptor pointing to the buffered data and a second descriptor pointing to the incoming data, hence avoiding the memory copy to a single buffer. With this patch, the time necessary for a md5sum on a 90Mbytes file is approximately 3 seconds. Without the patch it takes 6 seconds. Signed-off-by: Christophe Leroy <christophe.leroy@c-s.fr> Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
2017-10-06 21:05:06 +08:00
if (is_sec1 && req_ctx->nbuf && length) {
struct talitos_desc *desc2 = desc + 1;
dma_addr_t next_desc;
memset(desc2, 0, sizeof(*desc2));
desc2->hdr = desc->hdr;
desc2->hdr &= ~DESC_HDR_MODE0_MDEU_INIT;
desc2->hdr1 = desc2->hdr;
desc->hdr &= ~DESC_HDR_MODE0_MDEU_PAD;
desc->hdr |= DESC_HDR_MODE0_MDEU_CONT;
desc->hdr &= ~DESC_HDR_DONE_NOTIFY;
to_talitos_ptr(&desc2->ptr[1], ctx->dma_hw_context,
req_ctx->hw_context_size, is_sec1);
copy_talitos_ptr(&desc2->ptr[2], &desc->ptr[2], is_sec1);
sg_count = talitos_sg_map(dev, req_ctx->psrc, length, edesc,
&desc2->ptr[3], sg_count, offset, 0);
if (sg_count > 1)
sync_needed = true;
copy_talitos_ptr(&desc2->ptr[5], &desc->ptr[5], is_sec1);
if (req_ctx->last)
to_talitos_ptr(&desc->ptr[5], ctx->dma_hw_context,
req_ctx->hw_context_size, is_sec1);
next_desc = dma_map_single(dev, &desc2->hdr1, TALITOS_DESC_SIZE,
DMA_BIDIRECTIONAL);
desc->next_desc = cpu_to_be32(next_desc);
}
if (sync_needed)
dma_sync_single_for_device(dev, edesc->dma_link_tbl,
edesc->dma_len, DMA_BIDIRECTIONAL);
ret = talitos_submit(dev, ctx->ch, desc, callback, areq);
if (ret != -EINPROGRESS) {
common_nonsnoop_hash_unmap(dev, edesc, areq);
kfree(edesc);
}
return ret;
}
static struct talitos_edesc *ahash_edesc_alloc(struct ahash_request *areq,
unsigned int nbytes)
{
struct crypto_ahash *tfm = crypto_ahash_reqtfm(areq);
struct talitos_ctx *ctx = crypto_ahash_ctx(tfm);
struct talitos_ahash_req_ctx *req_ctx = ahash_request_ctx(areq);
crypto: talitos - chain in buffered data for ahash on SEC1 SEC1 doesn't support S/G in descriptors so for hash operations, the CPU has to build a buffer containing the buffered block and the incoming data. This generates a lot of memory copies which represents more than 50% of CPU time of a md5sum operation as shown below with a 'perf record'. |--86.24%-- kcapi_md_digest | | | |--86.18%-- _kcapi_common_vmsplice_chunk_fd | | | | | |--83.68%-- splice | | | | | | | |--83.59%-- ret_from_syscall | | | | | | | | | |--83.52%-- sys_splice | | | | | | | | | | | |--83.49%-- splice_from_pipe | | | | | | | | | | | | | |--83.04%-- __splice_from_pipe | | | | | | | | | | | | | | | |--80.67%-- pipe_to_sendpage | | | | | | | | | | | | | | | | | |--78.25%-- hash_sendpage | | | | | | | | | | | | | | | | | | | |--60.08%-- ahash_process_req | | | | | | | | | | | | | | | | | | | | | |--56.36%-- sg_copy_buffer | | | | | | | | | | | | | | | | | | | | | | | |--55.29%-- memcpy | | | | | | | | | | | | However, unlike SEC2+, SEC1 offers the possibility to chain descriptors. It is therefore possible to build a first descriptor pointing to the buffered data and a second descriptor pointing to the incoming data, hence avoiding the memory copy to a single buffer. With this patch, the time necessary for a md5sum on a 90Mbytes file is approximately 3 seconds. Without the patch it takes 6 seconds. Signed-off-by: Christophe Leroy <christophe.leroy@c-s.fr> Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
2017-10-06 21:05:06 +08:00
struct talitos_private *priv = dev_get_drvdata(ctx->dev);
bool is_sec1 = has_ftr_sec1(priv);
if (is_sec1)
nbytes -= req_ctx->nbuf;
return talitos_edesc_alloc(ctx->dev, req_ctx->psrc, NULL, NULL, 0,
crypto: talitos - fix aead sglen for case 'dst != src' For aead case when source and destination buffers are different, there is an incorrect assumption that the source length includes the ICV length. Fix this, since it leads to an oops when using sg_count() to find the number of nents in the scatterlist: Unable to handle kernel paging request for data at address 0x00000004 Faulting instruction address: 0xf2265a28 Oops: Kernel access of bad area, sig: 11 [#1] SMP NR_CPUS=8 P2020 RDB Modules linked in: talitos(+) CPU: 1 PID: 2187 Comm: cryptomgr_test Not tainted 3.11.0 #12 task: c4e72e20 ti: ef634000 task.ti: ef634000 NIP: f2265a28 LR: f2266ad8 CTR: c000c900 REGS: ef635bb0 TRAP: 0300 Not tainted (3.11.0) MSR: 00029000 <CE,EE,ME> CR: 42042084 XER: 00000000 DEAR: 00000004, ESR: 00000000 GPR00: f2266e10 ef635c60 c4e72e20 00000001 00000014 ef635c69 00000001 c11f3082 GPR08: 00000010 00000000 00000002 2f635d58 22044084 00000000 00000000 c0755c80 GPR16: c4bf1000 ef784000 00000000 00000000 00000020 00000014 00000010 ef2f6100 GPR24: ef2f6200 00000024 ef143210 ef2f6000 00000000 ef635d58 00000000 2f635d58 NIP [f2265a28] sg_count+0x1c/0xb4 [talitos] LR [f2266ad8] talitos_edesc_alloc+0x12c/0x410 [talitos] Call Trace: [ef635c60] [c0552068] schedule_timeout+0x148/0x1ac (unreliable) [ef635cc0] [f2266e10] aead_edesc_alloc+0x54/0x64 [talitos] [ef635ce0] [f22680f0] aead_encrypt+0x24/0x70 [talitos] [ef635cf0] [c024b948] __test_aead+0x494/0xf68 [ef635e20] [c024d54c] test_aead+0x64/0xcc [ef635e40] [c024d604] alg_test_aead+0x50/0xc4 [ef635e60] [c024c838] alg_test+0x10c/0x2e4 [ef635ee0] [c0249d1c] cryptomgr_test+0x4c/0x54 [ef635ef0] [c005d598] kthread+0xa8/0xac [ef635f40] [c000e3bc] ret_from_kernel_thread+0x5c/0x64 Instruction dump: 81230024 552807fe 0f080000 5523003a 4bffff24 39000000 2c040000 99050000 408100a0 7c691b78 38c00001 38600000 <80e90004> 38630001 8109000c 70ea0002 ---[ end trace 4498123cd8478591 ]--- Signed-off-by: Horia Geanta <horia.geanta@freescale.com> Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
2013-11-28 21:11:17 +08:00
nbytes, 0, 0, 0, areq->base.flags, false);
}
static int ahash_init(struct ahash_request *areq)
{
struct crypto_ahash *tfm = crypto_ahash_reqtfm(areq);
struct talitos_ctx *ctx = crypto_ahash_ctx(tfm);
struct device *dev = ctx->dev;
struct talitos_ahash_req_ctx *req_ctx = ahash_request_ctx(areq);
unsigned int size;
crypto: talitos - chain in buffered data for ahash on SEC1 SEC1 doesn't support S/G in descriptors so for hash operations, the CPU has to build a buffer containing the buffered block and the incoming data. This generates a lot of memory copies which represents more than 50% of CPU time of a md5sum operation as shown below with a 'perf record'. |--86.24%-- kcapi_md_digest | | | |--86.18%-- _kcapi_common_vmsplice_chunk_fd | | | | | |--83.68%-- splice | | | | | | | |--83.59%-- ret_from_syscall | | | | | | | | | |--83.52%-- sys_splice | | | | | | | | | | | |--83.49%-- splice_from_pipe | | | | | | | | | | | | | |--83.04%-- __splice_from_pipe | | | | | | | | | | | | | | | |--80.67%-- pipe_to_sendpage | | | | | | | | | | | | | | | | | |--78.25%-- hash_sendpage | | | | | | | | | | | | | | | | | | | |--60.08%-- ahash_process_req | | | | | | | | | | | | | | | | | | | | | |--56.36%-- sg_copy_buffer | | | | | | | | | | | | | | | | | | | | | | | |--55.29%-- memcpy | | | | | | | | | | | | However, unlike SEC2+, SEC1 offers the possibility to chain descriptors. It is therefore possible to build a first descriptor pointing to the buffered data and a second descriptor pointing to the incoming data, hence avoiding the memory copy to a single buffer. With this patch, the time necessary for a md5sum on a 90Mbytes file is approximately 3 seconds. Without the patch it takes 6 seconds. Signed-off-by: Christophe Leroy <christophe.leroy@c-s.fr> Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
2017-10-06 21:05:06 +08:00
struct talitos_private *priv = dev_get_drvdata(dev);
bool is_sec1 = has_ftr_sec1(priv);
/* Initialize the context */
req_ctx->buf_idx = 0;
req_ctx->nbuf = 0;
req_ctx->first = 1; /* first indicates h/w must init its context */
req_ctx->swinit = 0; /* assume h/w init of context */
size = (crypto_ahash_digestsize(tfm) <= SHA256_DIGEST_SIZE)
? TALITOS_MDEU_CONTEXT_SIZE_MD5_SHA1_SHA256
: TALITOS_MDEU_CONTEXT_SIZE_SHA384_SHA512;
req_ctx->hw_context_size = size;
if (ctx->dma_hw_context)
dma_unmap_single(dev, ctx->dma_hw_context, size,
DMA_BIDIRECTIONAL);
ctx->dma_hw_context = dma_map_single(dev, req_ctx->hw_context, size,
DMA_BIDIRECTIONAL);
crypto: talitos - chain in buffered data for ahash on SEC1 SEC1 doesn't support S/G in descriptors so for hash operations, the CPU has to build a buffer containing the buffered block and the incoming data. This generates a lot of memory copies which represents more than 50% of CPU time of a md5sum operation as shown below with a 'perf record'. |--86.24%-- kcapi_md_digest | | | |--86.18%-- _kcapi_common_vmsplice_chunk_fd | | | | | |--83.68%-- splice | | | | | | | |--83.59%-- ret_from_syscall | | | | | | | | | |--83.52%-- sys_splice | | | | | | | | | | | |--83.49%-- splice_from_pipe | | | | | | | | | | | | | |--83.04%-- __splice_from_pipe | | | | | | | | | | | | | | | |--80.67%-- pipe_to_sendpage | | | | | | | | | | | | | | | | | |--78.25%-- hash_sendpage | | | | | | | | | | | | | | | | | | | |--60.08%-- ahash_process_req | | | | | | | | | | | | | | | | | | | | | |--56.36%-- sg_copy_buffer | | | | | | | | | | | | | | | | | | | | | | | |--55.29%-- memcpy | | | | | | | | | | | | However, unlike SEC2+, SEC1 offers the possibility to chain descriptors. It is therefore possible to build a first descriptor pointing to the buffered data and a second descriptor pointing to the incoming data, hence avoiding the memory copy to a single buffer. With this patch, the time necessary for a md5sum on a 90Mbytes file is approximately 3 seconds. Without the patch it takes 6 seconds. Signed-off-by: Christophe Leroy <christophe.leroy@c-s.fr> Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
2017-10-06 21:05:06 +08:00
if (ctx->dma_buf)
dma_unmap_single(dev, ctx->dma_buf, sizeof(req_ctx->buf),
DMA_TO_DEVICE);
if (is_sec1)
ctx->dma_buf = dma_map_single(dev, req_ctx->buf,
sizeof(req_ctx->buf),
DMA_TO_DEVICE);
return 0;
}
/*
* on h/w without explicit sha224 support, we initialize h/w context
* manually with sha224 constants, and tell it to run sha256.
*/
static int ahash_init_sha224_swinit(struct ahash_request *areq)
{
struct talitos_ahash_req_ctx *req_ctx = ahash_request_ctx(areq);
struct crypto_ahash *tfm = crypto_ahash_reqtfm(areq);
struct talitos_ctx *ctx = crypto_ahash_ctx(tfm);
struct device *dev = ctx->dev;
ahash_init(areq);
req_ctx->swinit = 1;/* prevent h/w initting context with sha256 values*/
req_ctx->hw_context[0] = SHA224_H0;
req_ctx->hw_context[1] = SHA224_H1;
req_ctx->hw_context[2] = SHA224_H2;
req_ctx->hw_context[3] = SHA224_H3;
req_ctx->hw_context[4] = SHA224_H4;
req_ctx->hw_context[5] = SHA224_H5;
req_ctx->hw_context[6] = SHA224_H6;
req_ctx->hw_context[7] = SHA224_H7;
/* init 64-bit count */
req_ctx->hw_context[8] = 0;
req_ctx->hw_context[9] = 0;
dma_sync_single_for_device(dev, ctx->dma_hw_context,
req_ctx->hw_context_size, DMA_TO_DEVICE);
return 0;
}
static int ahash_process_req(struct ahash_request *areq, unsigned int nbytes)
{
struct crypto_ahash *tfm = crypto_ahash_reqtfm(areq);
struct talitos_ctx *ctx = crypto_ahash_ctx(tfm);
struct talitos_ahash_req_ctx *req_ctx = ahash_request_ctx(areq);
struct talitos_edesc *edesc;
unsigned int blocksize =
crypto_tfm_alg_blocksize(crypto_ahash_tfm(tfm));
unsigned int nbytes_to_hash;
unsigned int to_hash_later;
unsigned int nsg;
int nents;
crypto: talitos - chain in buffered data for ahash on SEC1 SEC1 doesn't support S/G in descriptors so for hash operations, the CPU has to build a buffer containing the buffered block and the incoming data. This generates a lot of memory copies which represents more than 50% of CPU time of a md5sum operation as shown below with a 'perf record'. |--86.24%-- kcapi_md_digest | | | |--86.18%-- _kcapi_common_vmsplice_chunk_fd | | | | | |--83.68%-- splice | | | | | | | |--83.59%-- ret_from_syscall | | | | | | | | | |--83.52%-- sys_splice | | | | | | | | | | | |--83.49%-- splice_from_pipe | | | | | | | | | | | | | |--83.04%-- __splice_from_pipe | | | | | | | | | | | | | | | |--80.67%-- pipe_to_sendpage | | | | | | | | | | | | | | | | | |--78.25%-- hash_sendpage | | | | | | | | | | | | | | | | | | | |--60.08%-- ahash_process_req | | | | | | | | | | | | | | | | | | | | | |--56.36%-- sg_copy_buffer | | | | | | | | | | | | | | | | | | | | | | | |--55.29%-- memcpy | | | | | | | | | | | | However, unlike SEC2+, SEC1 offers the possibility to chain descriptors. It is therefore possible to build a first descriptor pointing to the buffered data and a second descriptor pointing to the incoming data, hence avoiding the memory copy to a single buffer. With this patch, the time necessary for a md5sum on a 90Mbytes file is approximately 3 seconds. Without the patch it takes 6 seconds. Signed-off-by: Christophe Leroy <christophe.leroy@c-s.fr> Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
2017-10-06 21:05:06 +08:00
struct device *dev = ctx->dev;
struct talitos_private *priv = dev_get_drvdata(dev);
bool is_sec1 = has_ftr_sec1(priv);
int offset = 0;
u8 *ctx_buf = req_ctx->buf[req_ctx->buf_idx];
if (!req_ctx->last && (nbytes + req_ctx->nbuf <= blocksize)) {
/* Buffer up to one whole block */
nents = sg_nents_for_len(areq->src, nbytes);
if (nents < 0) {
dev_err(ctx->dev, "Invalid number of src SG.\n");
return nents;
}
sg_copy_to_buffer(areq->src, nents,
ctx_buf + req_ctx->nbuf, nbytes);
req_ctx->nbuf += nbytes;
return 0;
}
/* At least (blocksize + 1) bytes are available to hash */
nbytes_to_hash = nbytes + req_ctx->nbuf;
to_hash_later = nbytes_to_hash & (blocksize - 1);
if (req_ctx->last)
to_hash_later = 0;
else if (to_hash_later)
/* There is a partial block. Hash the full block(s) now */
nbytes_to_hash -= to_hash_later;
else {
/* Keep one block buffered */
nbytes_to_hash -= blocksize;
to_hash_later = blocksize;
}
/* Chain in any previously buffered data */
crypto: talitos - chain in buffered data for ahash on SEC1 SEC1 doesn't support S/G in descriptors so for hash operations, the CPU has to build a buffer containing the buffered block and the incoming data. This generates a lot of memory copies which represents more than 50% of CPU time of a md5sum operation as shown below with a 'perf record'. |--86.24%-- kcapi_md_digest | | | |--86.18%-- _kcapi_common_vmsplice_chunk_fd | | | | | |--83.68%-- splice | | | | | | | |--83.59%-- ret_from_syscall | | | | | | | | | |--83.52%-- sys_splice | | | | | | | | | | | |--83.49%-- splice_from_pipe | | | | | | | | | | | | | |--83.04%-- __splice_from_pipe | | | | | | | | | | | | | | | |--80.67%-- pipe_to_sendpage | | | | | | | | | | | | | | | | | |--78.25%-- hash_sendpage | | | | | | | | | | | | | | | | | | | |--60.08%-- ahash_process_req | | | | | | | | | | | | | | | | | | | | | |--56.36%-- sg_copy_buffer | | | | | | | | | | | | | | | | | | | | | | | |--55.29%-- memcpy | | | | | | | | | | | | However, unlike SEC2+, SEC1 offers the possibility to chain descriptors. It is therefore possible to build a first descriptor pointing to the buffered data and a second descriptor pointing to the incoming data, hence avoiding the memory copy to a single buffer. With this patch, the time necessary for a md5sum on a 90Mbytes file is approximately 3 seconds. Without the patch it takes 6 seconds. Signed-off-by: Christophe Leroy <christophe.leroy@c-s.fr> Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
2017-10-06 21:05:06 +08:00
if (!is_sec1 && req_ctx->nbuf) {
nsg = (req_ctx->nbuf < nbytes_to_hash) ? 2 : 1;
sg_init_table(req_ctx->bufsl, nsg);
sg_set_buf(req_ctx->bufsl, ctx_buf, req_ctx->nbuf);
if (nsg > 1)
sg_chain(req_ctx->bufsl, 2, areq->src);
req_ctx->psrc = req_ctx->bufsl;
crypto: talitos - chain in buffered data for ahash on SEC1 SEC1 doesn't support S/G in descriptors so for hash operations, the CPU has to build a buffer containing the buffered block and the incoming data. This generates a lot of memory copies which represents more than 50% of CPU time of a md5sum operation as shown below with a 'perf record'. |--86.24%-- kcapi_md_digest | | | |--86.18%-- _kcapi_common_vmsplice_chunk_fd | | | | | |--83.68%-- splice | | | | | | | |--83.59%-- ret_from_syscall | | | | | | | | | |--83.52%-- sys_splice | | | | | | | | | | | |--83.49%-- splice_from_pipe | | | | | | | | | | | | | |--83.04%-- __splice_from_pipe | | | | | | | | | | | | | | | |--80.67%-- pipe_to_sendpage | | | | | | | | | | | | | | | | | |--78.25%-- hash_sendpage | | | | | | | | | | | | | | | | | | | |--60.08%-- ahash_process_req | | | | | | | | | | | | | | | | | | | | | |--56.36%-- sg_copy_buffer | | | | | | | | | | | | | | | | | | | | | | | |--55.29%-- memcpy | | | | | | | | | | | | However, unlike SEC2+, SEC1 offers the possibility to chain descriptors. It is therefore possible to build a first descriptor pointing to the buffered data and a second descriptor pointing to the incoming data, hence avoiding the memory copy to a single buffer. With this patch, the time necessary for a md5sum on a 90Mbytes file is approximately 3 seconds. Without the patch it takes 6 seconds. Signed-off-by: Christophe Leroy <christophe.leroy@c-s.fr> Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
2017-10-06 21:05:06 +08:00
} else if (is_sec1 && req_ctx->nbuf && req_ctx->nbuf < blocksize) {
if (nbytes_to_hash > blocksize)
offset = blocksize - req_ctx->nbuf;
else
offset = nbytes_to_hash - req_ctx->nbuf;
nents = sg_nents_for_len(areq->src, offset);
if (nents < 0) {
dev_err(ctx->dev, "Invalid number of src SG.\n");
return nents;
}
sg_copy_to_buffer(areq->src, nents,
ctx_buf + req_ctx->nbuf, offset);
crypto: talitos - chain in buffered data for ahash on SEC1 SEC1 doesn't support S/G in descriptors so for hash operations, the CPU has to build a buffer containing the buffered block and the incoming data. This generates a lot of memory copies which represents more than 50% of CPU time of a md5sum operation as shown below with a 'perf record'. |--86.24%-- kcapi_md_digest | | | |--86.18%-- _kcapi_common_vmsplice_chunk_fd | | | | | |--83.68%-- splice | | | | | | | |--83.59%-- ret_from_syscall | | | | | | | | | |--83.52%-- sys_splice | | | | | | | | | | | |--83.49%-- splice_from_pipe | | | | | | | | | | | | | |--83.04%-- __splice_from_pipe | | | | | | | | | | | | | | | |--80.67%-- pipe_to_sendpage | | | | | | | | | | | | | | | | | |--78.25%-- hash_sendpage | | | | | | | | | | | | | | | | | | | |--60.08%-- ahash_process_req | | | | | | | | | | | | | | | | | | | | | |--56.36%-- sg_copy_buffer | | | | | | | | | | | | | | | | | | | | | | | |--55.29%-- memcpy | | | | | | | | | | | | However, unlike SEC2+, SEC1 offers the possibility to chain descriptors. It is therefore possible to build a first descriptor pointing to the buffered data and a second descriptor pointing to the incoming data, hence avoiding the memory copy to a single buffer. With this patch, the time necessary for a md5sum on a 90Mbytes file is approximately 3 seconds. Without the patch it takes 6 seconds. Signed-off-by: Christophe Leroy <christophe.leroy@c-s.fr> Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
2017-10-06 21:05:06 +08:00
req_ctx->nbuf += offset;
req_ctx->psrc = areq->src;
} else
req_ctx->psrc = areq->src;
if (to_hash_later) {
nents = sg_nents_for_len(areq->src, nbytes);
if (nents < 0) {
dev_err(ctx->dev, "Invalid number of src SG.\n");
return nents;
}
sg_pcopy_to_buffer(areq->src, nents,
req_ctx->buf[(req_ctx->buf_idx + 1) & 1],
to_hash_later,
nbytes - to_hash_later);
}
req_ctx->to_hash_later = to_hash_later;
/* Allocate extended descriptor */
edesc = ahash_edesc_alloc(areq, nbytes_to_hash);
if (IS_ERR(edesc))
return PTR_ERR(edesc);
edesc->desc.hdr = ctx->desc_hdr_template;
/* On last one, request SEC to pad; otherwise continue */
if (req_ctx->last)
edesc->desc.hdr |= DESC_HDR_MODE0_MDEU_PAD;
else
edesc->desc.hdr |= DESC_HDR_MODE0_MDEU_CONT;
/* request SEC to INIT hash. */
if (req_ctx->first && !req_ctx->swinit)
edesc->desc.hdr |= DESC_HDR_MODE0_MDEU_INIT;
if (is_sec1) {
dma_addr_t dma_buf = ctx->dma_buf + req_ctx->buf_idx *
HASH_MAX_BLOCK_SIZE;
dma_sync_single_for_device(dev, dma_buf,
crypto: talitos - chain in buffered data for ahash on SEC1 SEC1 doesn't support S/G in descriptors so for hash operations, the CPU has to build a buffer containing the buffered block and the incoming data. This generates a lot of memory copies which represents more than 50% of CPU time of a md5sum operation as shown below with a 'perf record'. |--86.24%-- kcapi_md_digest | | | |--86.18%-- _kcapi_common_vmsplice_chunk_fd | | | | | |--83.68%-- splice | | | | | | | |--83.59%-- ret_from_syscall | | | | | | | | | |--83.52%-- sys_splice | | | | | | | | | | | |--83.49%-- splice_from_pipe | | | | | | | | | | | | | |--83.04%-- __splice_from_pipe | | | | | | | | | | | | | | | |--80.67%-- pipe_to_sendpage | | | | | | | | | | | | | | | | | |--78.25%-- hash_sendpage | | | | | | | | | | | | | | | | | | | |--60.08%-- ahash_process_req | | | | | | | | | | | | | | | | | | | | | |--56.36%-- sg_copy_buffer | | | | | | | | | | | | | | | | | | | | | | | |--55.29%-- memcpy | | | | | | | | | | | | However, unlike SEC2+, SEC1 offers the possibility to chain descriptors. It is therefore possible to build a first descriptor pointing to the buffered data and a second descriptor pointing to the incoming data, hence avoiding the memory copy to a single buffer. With this patch, the time necessary for a md5sum on a 90Mbytes file is approximately 3 seconds. Without the patch it takes 6 seconds. Signed-off-by: Christophe Leroy <christophe.leroy@c-s.fr> Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
2017-10-06 21:05:06 +08:00
req_ctx->nbuf, DMA_TO_DEVICE);
}
/* When the tfm context has a keylen, it's an HMAC.
* A first or last (ie. not middle) descriptor must request HMAC.
*/
if (ctx->keylen && (req_ctx->first || req_ctx->last))
edesc->desc.hdr |= DESC_HDR_MODE0_MDEU_HMAC;
crypto: talitos - chain in buffered data for ahash on SEC1 SEC1 doesn't support S/G in descriptors so for hash operations, the CPU has to build a buffer containing the buffered block and the incoming data. This generates a lot of memory copies which represents more than 50% of CPU time of a md5sum operation as shown below with a 'perf record'. |--86.24%-- kcapi_md_digest | | | |--86.18%-- _kcapi_common_vmsplice_chunk_fd | | | | | |--83.68%-- splice | | | | | | | |--83.59%-- ret_from_syscall | | | | | | | | | |--83.52%-- sys_splice | | | | | | | | | | | |--83.49%-- splice_from_pipe | | | | | | | | | | | | | |--83.04%-- __splice_from_pipe | | | | | | | | | | | | | | | |--80.67%-- pipe_to_sendpage | | | | | | | | | | | | | | | | | |--78.25%-- hash_sendpage | | | | | | | | | | | | | | | | | | | |--60.08%-- ahash_process_req | | | | | | | | | | | | | | | | | | | | | |--56.36%-- sg_copy_buffer | | | | | | | | | | | | | | | | | | | | | | | |--55.29%-- memcpy | | | | | | | | | | | | However, unlike SEC2+, SEC1 offers the possibility to chain descriptors. It is therefore possible to build a first descriptor pointing to the buffered data and a second descriptor pointing to the incoming data, hence avoiding the memory copy to a single buffer. With this patch, the time necessary for a md5sum on a 90Mbytes file is approximately 3 seconds. Without the patch it takes 6 seconds. Signed-off-by: Christophe Leroy <christophe.leroy@c-s.fr> Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
2017-10-06 21:05:06 +08:00
return common_nonsnoop_hash(edesc, areq, nbytes_to_hash, offset,
ahash_done);
}
static int ahash_update(struct ahash_request *areq)
{
struct talitos_ahash_req_ctx *req_ctx = ahash_request_ctx(areq);
req_ctx->last = 0;
return ahash_process_req(areq, areq->nbytes);
}
static int ahash_final(struct ahash_request *areq)
{
struct talitos_ahash_req_ctx *req_ctx = ahash_request_ctx(areq);
req_ctx->last = 1;
return ahash_process_req(areq, 0);
}
static int ahash_finup(struct ahash_request *areq)
{
struct talitos_ahash_req_ctx *req_ctx = ahash_request_ctx(areq);
req_ctx->last = 1;
return ahash_process_req(areq, areq->nbytes);
}
static int ahash_digest(struct ahash_request *areq)
{
struct talitos_ahash_req_ctx *req_ctx = ahash_request_ctx(areq);
struct crypto_ahash *ahash = crypto_ahash_reqtfm(areq);
ahash->init(areq);
req_ctx->last = 1;
return ahash_process_req(areq, areq->nbytes);
}
static int ahash_export(struct ahash_request *areq, void *out)
{
struct talitos_ahash_req_ctx *req_ctx = ahash_request_ctx(areq);
struct talitos_export_state *export = out;
struct crypto_ahash *ahash = crypto_ahash_reqtfm(areq);
struct talitos_ctx *ctx = crypto_ahash_ctx(ahash);
struct device *dev = ctx->dev;
dma_sync_single_for_cpu(dev, ctx->dma_hw_context,
req_ctx->hw_context_size, DMA_FROM_DEVICE);
memcpy(export->hw_context, req_ctx->hw_context,
req_ctx->hw_context_size);
memcpy(export->buf, req_ctx->buf[req_ctx->buf_idx], req_ctx->nbuf);
export->swinit = req_ctx->swinit;
export->first = req_ctx->first;
export->last = req_ctx->last;
export->to_hash_later = req_ctx->to_hash_later;
export->nbuf = req_ctx->nbuf;
return 0;
}
static int ahash_import(struct ahash_request *areq, const void *in)
{
struct talitos_ahash_req_ctx *req_ctx = ahash_request_ctx(areq);
struct crypto_ahash *tfm = crypto_ahash_reqtfm(areq);
const struct talitos_export_state *export = in;
unsigned int size;
struct talitos_ctx *ctx = crypto_ahash_ctx(tfm);
struct device *dev = ctx->dev;
crypto: talitos - chain in buffered data for ahash on SEC1 SEC1 doesn't support S/G in descriptors so for hash operations, the CPU has to build a buffer containing the buffered block and the incoming data. This generates a lot of memory copies which represents more than 50% of CPU time of a md5sum operation as shown below with a 'perf record'. |--86.24%-- kcapi_md_digest | | | |--86.18%-- _kcapi_common_vmsplice_chunk_fd | | | | | |--83.68%-- splice | | | | | | | |--83.59%-- ret_from_syscall | | | | | | | | | |--83.52%-- sys_splice | | | | | | | | | | | |--83.49%-- splice_from_pipe | | | | | | | | | | | | | |--83.04%-- __splice_from_pipe | | | | | | | | | | | | | | | |--80.67%-- pipe_to_sendpage | | | | | | | | | | | | | | | | | |--78.25%-- hash_sendpage | | | | | | | | | | | | | | | | | | | |--60.08%-- ahash_process_req | | | | | | | | | | | | | | | | | | | | | |--56.36%-- sg_copy_buffer | | | | | | | | | | | | | | | | | | | | | | | |--55.29%-- memcpy | | | | | | | | | | | | However, unlike SEC2+, SEC1 offers the possibility to chain descriptors. It is therefore possible to build a first descriptor pointing to the buffered data and a second descriptor pointing to the incoming data, hence avoiding the memory copy to a single buffer. With this patch, the time necessary for a md5sum on a 90Mbytes file is approximately 3 seconds. Without the patch it takes 6 seconds. Signed-off-by: Christophe Leroy <christophe.leroy@c-s.fr> Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
2017-10-06 21:05:06 +08:00
struct talitos_private *priv = dev_get_drvdata(dev);
bool is_sec1 = has_ftr_sec1(priv);
memset(req_ctx, 0, sizeof(*req_ctx));
size = (crypto_ahash_digestsize(tfm) <= SHA256_DIGEST_SIZE)
? TALITOS_MDEU_CONTEXT_SIZE_MD5_SHA1_SHA256
: TALITOS_MDEU_CONTEXT_SIZE_SHA384_SHA512;
req_ctx->hw_context_size = size;
if (ctx->dma_hw_context)
dma_unmap_single(dev, ctx->dma_hw_context, size,
DMA_BIDIRECTIONAL);
memcpy(req_ctx->hw_context, export->hw_context, size);
ctx->dma_hw_context = dma_map_single(dev, req_ctx->hw_context, size,
DMA_BIDIRECTIONAL);
crypto: talitos - chain in buffered data for ahash on SEC1 SEC1 doesn't support S/G in descriptors so for hash operations, the CPU has to build a buffer containing the buffered block and the incoming data. This generates a lot of memory copies which represents more than 50% of CPU time of a md5sum operation as shown below with a 'perf record'. |--86.24%-- kcapi_md_digest | | | |--86.18%-- _kcapi_common_vmsplice_chunk_fd | | | | | |--83.68%-- splice | | | | | | | |--83.59%-- ret_from_syscall | | | | | | | | | |--83.52%-- sys_splice | | | | | | | | | | | |--83.49%-- splice_from_pipe | | | | | | | | | | | | | |--83.04%-- __splice_from_pipe | | | | | | | | | | | | | | | |--80.67%-- pipe_to_sendpage | | | | | | | | | | | | | | | | | |--78.25%-- hash_sendpage | | | | | | | | | | | | | | | | | | | |--60.08%-- ahash_process_req | | | | | | | | | | | | | | | | | | | | | |--56.36%-- sg_copy_buffer | | | | | | | | | | | | | | | | | | | | | | | |--55.29%-- memcpy | | | | | | | | | | | | However, unlike SEC2+, SEC1 offers the possibility to chain descriptors. It is therefore possible to build a first descriptor pointing to the buffered data and a second descriptor pointing to the incoming data, hence avoiding the memory copy to a single buffer. With this patch, the time necessary for a md5sum on a 90Mbytes file is approximately 3 seconds. Without the patch it takes 6 seconds. Signed-off-by: Christophe Leroy <christophe.leroy@c-s.fr> Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
2017-10-06 21:05:06 +08:00
if (ctx->dma_buf)
dma_unmap_single(dev, ctx->dma_buf, sizeof(req_ctx->buf),
DMA_TO_DEVICE);
memcpy(req_ctx->buf[0], export->buf, export->nbuf);
crypto: talitos - chain in buffered data for ahash on SEC1 SEC1 doesn't support S/G in descriptors so for hash operations, the CPU has to build a buffer containing the buffered block and the incoming data. This generates a lot of memory copies which represents more than 50% of CPU time of a md5sum operation as shown below with a 'perf record'. |--86.24%-- kcapi_md_digest | | | |--86.18%-- _kcapi_common_vmsplice_chunk_fd | | | | | |--83.68%-- splice | | | | | | | |--83.59%-- ret_from_syscall | | | | | | | | | |--83.52%-- sys_splice | | | | | | | | | | | |--83.49%-- splice_from_pipe | | | | | | | | | | | | | |--83.04%-- __splice_from_pipe | | | | | | | | | | | | | | | |--80.67%-- pipe_to_sendpage | | | | | | | | | | | | | | | | | |--78.25%-- hash_sendpage | | | | | | | | | | | | | | | | | | | |--60.08%-- ahash_process_req | | | | | | | | | | | | | | | | | | | | | |--56.36%-- sg_copy_buffer | | | | | | | | | | | | | | | | | | | | | | | |--55.29%-- memcpy | | | | | | | | | | | | However, unlike SEC2+, SEC1 offers the possibility to chain descriptors. It is therefore possible to build a first descriptor pointing to the buffered data and a second descriptor pointing to the incoming data, hence avoiding the memory copy to a single buffer. With this patch, the time necessary for a md5sum on a 90Mbytes file is approximately 3 seconds. Without the patch it takes 6 seconds. Signed-off-by: Christophe Leroy <christophe.leroy@c-s.fr> Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
2017-10-06 21:05:06 +08:00
if (is_sec1)
ctx->dma_buf = dma_map_single(dev, req_ctx->buf,
sizeof(req_ctx->buf),
DMA_TO_DEVICE);
req_ctx->swinit = export->swinit;
req_ctx->first = export->first;
req_ctx->last = export->last;
req_ctx->to_hash_later = export->to_hash_later;
req_ctx->nbuf = export->nbuf;
return 0;
}
static int keyhash(struct crypto_ahash *tfm, const u8 *key, unsigned int keylen,
u8 *hash)
{
struct talitos_ctx *ctx = crypto_tfm_ctx(crypto_ahash_tfm(tfm));
struct scatterlist sg[1];
struct ahash_request *req;
struct crypto_wait wait;
int ret;
crypto_init_wait(&wait);
req = ahash_request_alloc(tfm, GFP_KERNEL);
if (!req)
return -ENOMEM;
/* Keep tfm keylen == 0 during hash of the long key */
ctx->keylen = 0;
ahash_request_set_callback(req, CRYPTO_TFM_REQ_MAY_BACKLOG,
crypto_req_done, &wait);
sg_init_one(&sg[0], key, keylen);
ahash_request_set_crypt(req, sg, hash, keylen);
ret = crypto_wait_req(crypto_ahash_digest(req), &wait);
ahash_request_free(req);
return ret;
}
static int ahash_setkey(struct crypto_ahash *tfm, const u8 *key,
unsigned int keylen)
{
struct talitos_ctx *ctx = crypto_tfm_ctx(crypto_ahash_tfm(tfm));
struct device *dev = ctx->dev;
unsigned int blocksize =
crypto_tfm_alg_blocksize(crypto_ahash_tfm(tfm));
unsigned int digestsize = crypto_ahash_digestsize(tfm);
unsigned int keysize = keylen;
u8 hash[SHA512_DIGEST_SIZE];
int ret;
if (keylen <= blocksize)
memcpy(ctx->key, key, keysize);
else {
/* Must get the hash of the long key */
ret = keyhash(tfm, key, keylen, hash);
if (ret) {
crypto_ahash_set_flags(tfm, CRYPTO_TFM_RES_BAD_KEY_LEN);
return -EINVAL;
}
keysize = digestsize;
memcpy(ctx->key, hash, digestsize);
}
if (ctx->keylen)
dma_unmap_single(dev, ctx->dma_key, ctx->keylen, DMA_TO_DEVICE);
ctx->keylen = keysize;
ctx->dma_key = dma_map_single(dev, ctx->key, keysize, DMA_TO_DEVICE);
return 0;
}
struct talitos_alg_template {
u32 type;
u32 priority;
union {
struct crypto_alg crypto;
struct ahash_alg hash;
struct aead_alg aead;
} alg;
__be32 desc_hdr_template;
};
static struct talitos_alg_template driver_algs[] = {
/* AEAD algorithms. These use a single-pass ipsec_esp descriptor */
{ .type = CRYPTO_ALG_TYPE_AEAD,
.alg.aead = {
.base = {
.cra_name = "authenc(hmac(sha1),cbc(aes))",
.cra_driver_name = "authenc-hmac-sha1-"
"cbc-aes-talitos",
.cra_blocksize = AES_BLOCK_SIZE,
.cra_flags = CRYPTO_ALG_ASYNC,
},
.ivsize = AES_BLOCK_SIZE,
.maxauthsize = SHA1_DIGEST_SIZE,
},
.desc_hdr_template = DESC_HDR_TYPE_IPSEC_ESP |
DESC_HDR_SEL0_AESU |
DESC_HDR_MODE0_AESU_CBC |
DESC_HDR_SEL1_MDEUA |
DESC_HDR_MODE1_MDEU_INIT |
DESC_HDR_MODE1_MDEU_PAD |
DESC_HDR_MODE1_MDEU_SHA1_HMAC,
},
{ .type = CRYPTO_ALG_TYPE_AEAD,
.priority = TALITOS_CRA_PRIORITY_AEAD_HSNA,
.alg.aead = {
.base = {
.cra_name = "authenc(hmac(sha1),cbc(aes))",
.cra_driver_name = "authenc-hmac-sha1-"
"cbc-aes-talitos",
.cra_blocksize = AES_BLOCK_SIZE,
.cra_flags = CRYPTO_ALG_ASYNC,
},
.ivsize = AES_BLOCK_SIZE,
.maxauthsize = SHA1_DIGEST_SIZE,
},
.desc_hdr_template = DESC_HDR_TYPE_HMAC_SNOOP_NO_AFEU |
DESC_HDR_SEL0_AESU |
DESC_HDR_MODE0_AESU_CBC |
DESC_HDR_SEL1_MDEUA |
DESC_HDR_MODE1_MDEU_INIT |
DESC_HDR_MODE1_MDEU_PAD |
DESC_HDR_MODE1_MDEU_SHA1_HMAC,
},
{ .type = CRYPTO_ALG_TYPE_AEAD,
.alg.aead = {
.base = {
.cra_name = "authenc(hmac(sha1),"
"cbc(des3_ede))",
.cra_driver_name = "authenc-hmac-sha1-"
"cbc-3des-talitos",
.cra_blocksize = DES3_EDE_BLOCK_SIZE,
.cra_flags = CRYPTO_ALG_ASYNC,
},
.ivsize = DES3_EDE_BLOCK_SIZE,
.maxauthsize = SHA1_DIGEST_SIZE,
},
.desc_hdr_template = DESC_HDR_TYPE_IPSEC_ESP |
DESC_HDR_SEL0_DEU |
DESC_HDR_MODE0_DEU_CBC |
DESC_HDR_MODE0_DEU_3DES |
DESC_HDR_SEL1_MDEUA |
DESC_HDR_MODE1_MDEU_INIT |
DESC_HDR_MODE1_MDEU_PAD |
DESC_HDR_MODE1_MDEU_SHA1_HMAC,
},
{ .type = CRYPTO_ALG_TYPE_AEAD,
.priority = TALITOS_CRA_PRIORITY_AEAD_HSNA,
.alg.aead = {
.base = {
.cra_name = "authenc(hmac(sha1),"
"cbc(des3_ede))",
.cra_driver_name = "authenc-hmac-sha1-"
"cbc-3des-talitos",
.cra_blocksize = DES3_EDE_BLOCK_SIZE,
.cra_flags = CRYPTO_ALG_ASYNC,
},
.ivsize = DES3_EDE_BLOCK_SIZE,
.maxauthsize = SHA1_DIGEST_SIZE,
},
.desc_hdr_template = DESC_HDR_TYPE_HMAC_SNOOP_NO_AFEU |
DESC_HDR_SEL0_DEU |
DESC_HDR_MODE0_DEU_CBC |
DESC_HDR_MODE0_DEU_3DES |
DESC_HDR_SEL1_MDEUA |
DESC_HDR_MODE1_MDEU_INIT |
DESC_HDR_MODE1_MDEU_PAD |
DESC_HDR_MODE1_MDEU_SHA1_HMAC,
},
{ .type = CRYPTO_ALG_TYPE_AEAD,
.alg.aead = {
.base = {
.cra_name = "authenc(hmac(sha224),cbc(aes))",
.cra_driver_name = "authenc-hmac-sha224-"
"cbc-aes-talitos",
.cra_blocksize = AES_BLOCK_SIZE,
.cra_flags = CRYPTO_ALG_ASYNC,
},
.ivsize = AES_BLOCK_SIZE,
.maxauthsize = SHA224_DIGEST_SIZE,
},
.desc_hdr_template = DESC_HDR_TYPE_IPSEC_ESP |
DESC_HDR_SEL0_AESU |
DESC_HDR_MODE0_AESU_CBC |
DESC_HDR_SEL1_MDEUA |
DESC_HDR_MODE1_MDEU_INIT |
DESC_HDR_MODE1_MDEU_PAD |
DESC_HDR_MODE1_MDEU_SHA224_HMAC,
},
{ .type = CRYPTO_ALG_TYPE_AEAD,
.priority = TALITOS_CRA_PRIORITY_AEAD_HSNA,
.alg.aead = {
.base = {
.cra_name = "authenc(hmac(sha224),cbc(aes))",
.cra_driver_name = "authenc-hmac-sha224-"
"cbc-aes-talitos",
.cra_blocksize = AES_BLOCK_SIZE,
.cra_flags = CRYPTO_ALG_ASYNC,
},
.ivsize = AES_BLOCK_SIZE,
.maxauthsize = SHA224_DIGEST_SIZE,
},
.desc_hdr_template = DESC_HDR_TYPE_HMAC_SNOOP_NO_AFEU |
DESC_HDR_SEL0_AESU |
DESC_HDR_MODE0_AESU_CBC |
DESC_HDR_SEL1_MDEUA |
DESC_HDR_MODE1_MDEU_INIT |
DESC_HDR_MODE1_MDEU_PAD |
DESC_HDR_MODE1_MDEU_SHA224_HMAC,
},
{ .type = CRYPTO_ALG_TYPE_AEAD,
.alg.aead = {
.base = {
.cra_name = "authenc(hmac(sha224),"
"cbc(des3_ede))",
.cra_driver_name = "authenc-hmac-sha224-"
"cbc-3des-talitos",
.cra_blocksize = DES3_EDE_BLOCK_SIZE,
.cra_flags = CRYPTO_ALG_ASYNC,
},
.ivsize = DES3_EDE_BLOCK_SIZE,
.maxauthsize = SHA224_DIGEST_SIZE,
},
.desc_hdr_template = DESC_HDR_TYPE_IPSEC_ESP |
DESC_HDR_SEL0_DEU |
DESC_HDR_MODE0_DEU_CBC |
DESC_HDR_MODE0_DEU_3DES |
DESC_HDR_SEL1_MDEUA |
DESC_HDR_MODE1_MDEU_INIT |
DESC_HDR_MODE1_MDEU_PAD |
DESC_HDR_MODE1_MDEU_SHA224_HMAC,
},
{ .type = CRYPTO_ALG_TYPE_AEAD,
.priority = TALITOS_CRA_PRIORITY_AEAD_HSNA,
.alg.aead = {
.base = {
.cra_name = "authenc(hmac(sha224),"
"cbc(des3_ede))",
.cra_driver_name = "authenc-hmac-sha224-"
"cbc-3des-talitos",
.cra_blocksize = DES3_EDE_BLOCK_SIZE,
.cra_flags = CRYPTO_ALG_ASYNC,
},
.ivsize = DES3_EDE_BLOCK_SIZE,
.maxauthsize = SHA224_DIGEST_SIZE,
},
.desc_hdr_template = DESC_HDR_TYPE_HMAC_SNOOP_NO_AFEU |
DESC_HDR_SEL0_DEU |
DESC_HDR_MODE0_DEU_CBC |
DESC_HDR_MODE0_DEU_3DES |
DESC_HDR_SEL1_MDEUA |
DESC_HDR_MODE1_MDEU_INIT |
DESC_HDR_MODE1_MDEU_PAD |
DESC_HDR_MODE1_MDEU_SHA224_HMAC,
},
{ .type = CRYPTO_ALG_TYPE_AEAD,
.alg.aead = {
.base = {
.cra_name = "authenc(hmac(sha256),cbc(aes))",
.cra_driver_name = "authenc-hmac-sha256-"
"cbc-aes-talitos",
.cra_blocksize = AES_BLOCK_SIZE,
.cra_flags = CRYPTO_ALG_ASYNC,
},
.ivsize = AES_BLOCK_SIZE,
.maxauthsize = SHA256_DIGEST_SIZE,
},
.desc_hdr_template = DESC_HDR_TYPE_IPSEC_ESP |
DESC_HDR_SEL0_AESU |
DESC_HDR_MODE0_AESU_CBC |
DESC_HDR_SEL1_MDEUA |
DESC_HDR_MODE1_MDEU_INIT |
DESC_HDR_MODE1_MDEU_PAD |
DESC_HDR_MODE1_MDEU_SHA256_HMAC,
},
{ .type = CRYPTO_ALG_TYPE_AEAD,
.priority = TALITOS_CRA_PRIORITY_AEAD_HSNA,
.alg.aead = {
.base = {
.cra_name = "authenc(hmac(sha256),cbc(aes))",
.cra_driver_name = "authenc-hmac-sha256-"
"cbc-aes-talitos",
.cra_blocksize = AES_BLOCK_SIZE,
.cra_flags = CRYPTO_ALG_ASYNC,
},
.ivsize = AES_BLOCK_SIZE,
.maxauthsize = SHA256_DIGEST_SIZE,
},
.desc_hdr_template = DESC_HDR_TYPE_HMAC_SNOOP_NO_AFEU |
DESC_HDR_SEL0_AESU |
DESC_HDR_MODE0_AESU_CBC |
DESC_HDR_SEL1_MDEUA |
DESC_HDR_MODE1_MDEU_INIT |
DESC_HDR_MODE1_MDEU_PAD |
DESC_HDR_MODE1_MDEU_SHA256_HMAC,
},
{ .type = CRYPTO_ALG_TYPE_AEAD,
.alg.aead = {
.base = {
.cra_name = "authenc(hmac(sha256),"
"cbc(des3_ede))",
.cra_driver_name = "authenc-hmac-sha256-"
"cbc-3des-talitos",
.cra_blocksize = DES3_EDE_BLOCK_SIZE,
.cra_flags = CRYPTO_ALG_ASYNC,
},
.ivsize = DES3_EDE_BLOCK_SIZE,
.maxauthsize = SHA256_DIGEST_SIZE,
},
.desc_hdr_template = DESC_HDR_TYPE_IPSEC_ESP |
DESC_HDR_SEL0_DEU |
DESC_HDR_MODE0_DEU_CBC |
DESC_HDR_MODE0_DEU_3DES |
DESC_HDR_SEL1_MDEUA |
DESC_HDR_MODE1_MDEU_INIT |
DESC_HDR_MODE1_MDEU_PAD |
DESC_HDR_MODE1_MDEU_SHA256_HMAC,
},
{ .type = CRYPTO_ALG_TYPE_AEAD,
.priority = TALITOS_CRA_PRIORITY_AEAD_HSNA,
.alg.aead = {
.base = {
.cra_name = "authenc(hmac(sha256),"
"cbc(des3_ede))",
.cra_driver_name = "authenc-hmac-sha256-"
"cbc-3des-talitos",
.cra_blocksize = DES3_EDE_BLOCK_SIZE,
.cra_flags = CRYPTO_ALG_ASYNC,
},
.ivsize = DES3_EDE_BLOCK_SIZE,
.maxauthsize = SHA256_DIGEST_SIZE,
},
.desc_hdr_template = DESC_HDR_TYPE_HMAC_SNOOP_NO_AFEU |
DESC_HDR_SEL0_DEU |
DESC_HDR_MODE0_DEU_CBC |
DESC_HDR_MODE0_DEU_3DES |
DESC_HDR_SEL1_MDEUA |
DESC_HDR_MODE1_MDEU_INIT |
DESC_HDR_MODE1_MDEU_PAD |
DESC_HDR_MODE1_MDEU_SHA256_HMAC,
},
{ .type = CRYPTO_ALG_TYPE_AEAD,
.alg.aead = {
.base = {
.cra_name = "authenc(hmac(sha384),cbc(aes))",
.cra_driver_name = "authenc-hmac-sha384-"
"cbc-aes-talitos",
.cra_blocksize = AES_BLOCK_SIZE,
.cra_flags = CRYPTO_ALG_ASYNC,
},
.ivsize = AES_BLOCK_SIZE,
.maxauthsize = SHA384_DIGEST_SIZE,
},
.desc_hdr_template = DESC_HDR_TYPE_IPSEC_ESP |
DESC_HDR_SEL0_AESU |
DESC_HDR_MODE0_AESU_CBC |
DESC_HDR_SEL1_MDEUB |
DESC_HDR_MODE1_MDEU_INIT |
DESC_HDR_MODE1_MDEU_PAD |
DESC_HDR_MODE1_MDEUB_SHA384_HMAC,
},
{ .type = CRYPTO_ALG_TYPE_AEAD,
.alg.aead = {
.base = {
.cra_name = "authenc(hmac(sha384),"
"cbc(des3_ede))",
.cra_driver_name = "authenc-hmac-sha384-"
"cbc-3des-talitos",
.cra_blocksize = DES3_EDE_BLOCK_SIZE,
.cra_flags = CRYPTO_ALG_ASYNC,
},
.ivsize = DES3_EDE_BLOCK_SIZE,
.maxauthsize = SHA384_DIGEST_SIZE,
},
.desc_hdr_template = DESC_HDR_TYPE_IPSEC_ESP |
DESC_HDR_SEL0_DEU |
DESC_HDR_MODE0_DEU_CBC |
DESC_HDR_MODE0_DEU_3DES |
DESC_HDR_SEL1_MDEUB |
DESC_HDR_MODE1_MDEU_INIT |
DESC_HDR_MODE1_MDEU_PAD |
DESC_HDR_MODE1_MDEUB_SHA384_HMAC,
},
{ .type = CRYPTO_ALG_TYPE_AEAD,
.alg.aead = {
.base = {
.cra_name = "authenc(hmac(sha512),cbc(aes))",
.cra_driver_name = "authenc-hmac-sha512-"
"cbc-aes-talitos",
.cra_blocksize = AES_BLOCK_SIZE,
.cra_flags = CRYPTO_ALG_ASYNC,
},
.ivsize = AES_BLOCK_SIZE,
.maxauthsize = SHA512_DIGEST_SIZE,
},
.desc_hdr_template = DESC_HDR_TYPE_IPSEC_ESP |
DESC_HDR_SEL0_AESU |
DESC_HDR_MODE0_AESU_CBC |
DESC_HDR_SEL1_MDEUB |
DESC_HDR_MODE1_MDEU_INIT |
DESC_HDR_MODE1_MDEU_PAD |
DESC_HDR_MODE1_MDEUB_SHA512_HMAC,
},
{ .type = CRYPTO_ALG_TYPE_AEAD,
.alg.aead = {
.base = {
.cra_name = "authenc(hmac(sha512),"
"cbc(des3_ede))",
.cra_driver_name = "authenc-hmac-sha512-"
"cbc-3des-talitos",
.cra_blocksize = DES3_EDE_BLOCK_SIZE,
.cra_flags = CRYPTO_ALG_ASYNC,
},
.ivsize = DES3_EDE_BLOCK_SIZE,
.maxauthsize = SHA512_DIGEST_SIZE,
},
.desc_hdr_template = DESC_HDR_TYPE_IPSEC_ESP |
DESC_HDR_SEL0_DEU |
DESC_HDR_MODE0_DEU_CBC |
DESC_HDR_MODE0_DEU_3DES |
DESC_HDR_SEL1_MDEUB |
DESC_HDR_MODE1_MDEU_INIT |
DESC_HDR_MODE1_MDEU_PAD |
DESC_HDR_MODE1_MDEUB_SHA512_HMAC,
},
{ .type = CRYPTO_ALG_TYPE_AEAD,
.alg.aead = {
.base = {
.cra_name = "authenc(hmac(md5),cbc(aes))",
.cra_driver_name = "authenc-hmac-md5-"
"cbc-aes-talitos",
.cra_blocksize = AES_BLOCK_SIZE,
.cra_flags = CRYPTO_ALG_ASYNC,
},
.ivsize = AES_BLOCK_SIZE,
.maxauthsize = MD5_DIGEST_SIZE,
},
.desc_hdr_template = DESC_HDR_TYPE_IPSEC_ESP |
DESC_HDR_SEL0_AESU |
DESC_HDR_MODE0_AESU_CBC |
DESC_HDR_SEL1_MDEUA |
DESC_HDR_MODE1_MDEU_INIT |
DESC_HDR_MODE1_MDEU_PAD |
DESC_HDR_MODE1_MDEU_MD5_HMAC,
},
{ .type = CRYPTO_ALG_TYPE_AEAD,
.priority = TALITOS_CRA_PRIORITY_AEAD_HSNA,
.alg.aead = {
.base = {
.cra_name = "authenc(hmac(md5),cbc(aes))",
.cra_driver_name = "authenc-hmac-md5-"
"cbc-aes-talitos",
.cra_blocksize = AES_BLOCK_SIZE,
.cra_flags = CRYPTO_ALG_ASYNC,
},
.ivsize = AES_BLOCK_SIZE,
.maxauthsize = MD5_DIGEST_SIZE,
},
.desc_hdr_template = DESC_HDR_TYPE_HMAC_SNOOP_NO_AFEU |
DESC_HDR_SEL0_AESU |
DESC_HDR_MODE0_AESU_CBC |
DESC_HDR_SEL1_MDEUA |
DESC_HDR_MODE1_MDEU_INIT |
DESC_HDR_MODE1_MDEU_PAD |
DESC_HDR_MODE1_MDEU_MD5_HMAC,
},
{ .type = CRYPTO_ALG_TYPE_AEAD,
.alg.aead = {
.base = {
.cra_name = "authenc(hmac(md5),cbc(des3_ede))",
.cra_driver_name = "authenc-hmac-md5-"
"cbc-3des-talitos",
.cra_blocksize = DES3_EDE_BLOCK_SIZE,
.cra_flags = CRYPTO_ALG_ASYNC,
},
.ivsize = DES3_EDE_BLOCK_SIZE,
.maxauthsize = MD5_DIGEST_SIZE,
},
.desc_hdr_template = DESC_HDR_TYPE_IPSEC_ESP |
DESC_HDR_SEL0_DEU |
DESC_HDR_MODE0_DEU_CBC |
DESC_HDR_MODE0_DEU_3DES |
DESC_HDR_SEL1_MDEUA |
DESC_HDR_MODE1_MDEU_INIT |
DESC_HDR_MODE1_MDEU_PAD |
DESC_HDR_MODE1_MDEU_MD5_HMAC,
},
{ .type = CRYPTO_ALG_TYPE_AEAD,
.priority = TALITOS_CRA_PRIORITY_AEAD_HSNA,
.alg.aead = {
.base = {
.cra_name = "authenc(hmac(md5),cbc(des3_ede))",
.cra_driver_name = "authenc-hmac-md5-"
"cbc-3des-talitos",
.cra_blocksize = DES3_EDE_BLOCK_SIZE,
.cra_flags = CRYPTO_ALG_ASYNC,
},
.ivsize = DES3_EDE_BLOCK_SIZE,
.maxauthsize = MD5_DIGEST_SIZE,
},
.desc_hdr_template = DESC_HDR_TYPE_HMAC_SNOOP_NO_AFEU |
DESC_HDR_SEL0_DEU |
DESC_HDR_MODE0_DEU_CBC |
DESC_HDR_MODE0_DEU_3DES |
DESC_HDR_SEL1_MDEUA |
DESC_HDR_MODE1_MDEU_INIT |
DESC_HDR_MODE1_MDEU_PAD |
DESC_HDR_MODE1_MDEU_MD5_HMAC,
},
/* ABLKCIPHER algorithms. */
{ .type = CRYPTO_ALG_TYPE_ABLKCIPHER,
.alg.crypto = {
.cra_name = "ecb(aes)",
.cra_driver_name = "ecb-aes-talitos",
.cra_blocksize = AES_BLOCK_SIZE,
.cra_flags = CRYPTO_ALG_TYPE_ABLKCIPHER |
CRYPTO_ALG_ASYNC,
.cra_ablkcipher = {
.min_keysize = AES_MIN_KEY_SIZE,
.max_keysize = AES_MAX_KEY_SIZE,
.ivsize = AES_BLOCK_SIZE,
}
},
.desc_hdr_template = DESC_HDR_TYPE_COMMON_NONSNOOP_NO_AFEU |
DESC_HDR_SEL0_AESU,
},
{ .type = CRYPTO_ALG_TYPE_ABLKCIPHER,
.alg.crypto = {
.cra_name = "cbc(aes)",
.cra_driver_name = "cbc-aes-talitos",
.cra_blocksize = AES_BLOCK_SIZE,
.cra_flags = CRYPTO_ALG_TYPE_ABLKCIPHER |
CRYPTO_ALG_ASYNC,
.cra_ablkcipher = {
.min_keysize = AES_MIN_KEY_SIZE,
.max_keysize = AES_MAX_KEY_SIZE,
.ivsize = AES_BLOCK_SIZE,
}
},
.desc_hdr_template = DESC_HDR_TYPE_COMMON_NONSNOOP_NO_AFEU |
DESC_HDR_SEL0_AESU |
DESC_HDR_MODE0_AESU_CBC,
},
{ .type = CRYPTO_ALG_TYPE_ABLKCIPHER,
.alg.crypto = {
.cra_name = "ctr(aes)",
.cra_driver_name = "ctr-aes-talitos",
.cra_blocksize = AES_BLOCK_SIZE,
.cra_flags = CRYPTO_ALG_TYPE_ABLKCIPHER |
CRYPTO_ALG_ASYNC,
.cra_ablkcipher = {
.min_keysize = AES_MIN_KEY_SIZE,
.max_keysize = AES_MAX_KEY_SIZE,
.ivsize = AES_BLOCK_SIZE,
}
},
.desc_hdr_template = DESC_HDR_TYPE_AESU_CTR_NONSNOOP |
DESC_HDR_SEL0_AESU |
DESC_HDR_MODE0_AESU_CTR,
},
{ .type = CRYPTO_ALG_TYPE_ABLKCIPHER,
.alg.crypto = {
.cra_name = "ecb(des)",
.cra_driver_name = "ecb-des-talitos",
.cra_blocksize = DES_BLOCK_SIZE,
.cra_flags = CRYPTO_ALG_TYPE_ABLKCIPHER |
CRYPTO_ALG_ASYNC,
.cra_ablkcipher = {
.min_keysize = DES_KEY_SIZE,
.max_keysize = DES_KEY_SIZE,
.ivsize = DES_BLOCK_SIZE,
}
},
.desc_hdr_template = DESC_HDR_TYPE_COMMON_NONSNOOP_NO_AFEU |
DESC_HDR_SEL0_DEU,
},
{ .type = CRYPTO_ALG_TYPE_ABLKCIPHER,
.alg.crypto = {
.cra_name = "cbc(des)",
.cra_driver_name = "cbc-des-talitos",
.cra_blocksize = DES_BLOCK_SIZE,
.cra_flags = CRYPTO_ALG_TYPE_ABLKCIPHER |
CRYPTO_ALG_ASYNC,
.cra_ablkcipher = {
.min_keysize = DES_KEY_SIZE,
.max_keysize = DES_KEY_SIZE,
.ivsize = DES_BLOCK_SIZE,
}
},
.desc_hdr_template = DESC_HDR_TYPE_COMMON_NONSNOOP_NO_AFEU |
DESC_HDR_SEL0_DEU |
DESC_HDR_MODE0_DEU_CBC,
},
{ .type = CRYPTO_ALG_TYPE_ABLKCIPHER,
.alg.crypto = {
.cra_name = "ecb(des3_ede)",
.cra_driver_name = "ecb-3des-talitos",
.cra_blocksize = DES3_EDE_BLOCK_SIZE,
.cra_flags = CRYPTO_ALG_TYPE_ABLKCIPHER |
CRYPTO_ALG_ASYNC,
.cra_ablkcipher = {
.min_keysize = DES3_EDE_KEY_SIZE,
.max_keysize = DES3_EDE_KEY_SIZE,
.ivsize = DES3_EDE_BLOCK_SIZE,
}
},
.desc_hdr_template = DESC_HDR_TYPE_COMMON_NONSNOOP_NO_AFEU |
DESC_HDR_SEL0_DEU |
DESC_HDR_MODE0_DEU_3DES,
},
{ .type = CRYPTO_ALG_TYPE_ABLKCIPHER,
.alg.crypto = {
.cra_name = "cbc(des3_ede)",
.cra_driver_name = "cbc-3des-talitos",
.cra_blocksize = DES3_EDE_BLOCK_SIZE,
.cra_flags = CRYPTO_ALG_TYPE_ABLKCIPHER |
CRYPTO_ALG_ASYNC,
.cra_ablkcipher = {
.min_keysize = DES3_EDE_KEY_SIZE,
.max_keysize = DES3_EDE_KEY_SIZE,
.ivsize = DES3_EDE_BLOCK_SIZE,
}
},
.desc_hdr_template = DESC_HDR_TYPE_COMMON_NONSNOOP_NO_AFEU |
DESC_HDR_SEL0_DEU |
DESC_HDR_MODE0_DEU_CBC |
DESC_HDR_MODE0_DEU_3DES,
},
/* AHASH algorithms. */
{ .type = CRYPTO_ALG_TYPE_AHASH,
.alg.hash = {
.halg.digestsize = MD5_DIGEST_SIZE,
.halg.statesize = sizeof(struct talitos_export_state),
.halg.base = {
.cra_name = "md5",
.cra_driver_name = "md5-talitos",
.cra_blocksize = MD5_HMAC_BLOCK_SIZE,
.cra_flags = CRYPTO_ALG_TYPE_AHASH |
CRYPTO_ALG_ASYNC,
}
},
.desc_hdr_template = DESC_HDR_TYPE_COMMON_NONSNOOP_NO_AFEU |
DESC_HDR_SEL0_MDEUA |
DESC_HDR_MODE0_MDEU_MD5,
},
{ .type = CRYPTO_ALG_TYPE_AHASH,
.alg.hash = {
.halg.digestsize = SHA1_DIGEST_SIZE,
.halg.statesize = sizeof(struct talitos_export_state),
.halg.base = {
.cra_name = "sha1",
.cra_driver_name = "sha1-talitos",
.cra_blocksize = SHA1_BLOCK_SIZE,
.cra_flags = CRYPTO_ALG_TYPE_AHASH |
CRYPTO_ALG_ASYNC,
}
},
.desc_hdr_template = DESC_HDR_TYPE_COMMON_NONSNOOP_NO_AFEU |
DESC_HDR_SEL0_MDEUA |
DESC_HDR_MODE0_MDEU_SHA1,
},
{ .type = CRYPTO_ALG_TYPE_AHASH,
.alg.hash = {
.halg.digestsize = SHA224_DIGEST_SIZE,
.halg.statesize = sizeof(struct talitos_export_state),
.halg.base = {
.cra_name = "sha224",
.cra_driver_name = "sha224-talitos",
.cra_blocksize = SHA224_BLOCK_SIZE,
.cra_flags = CRYPTO_ALG_TYPE_AHASH |
CRYPTO_ALG_ASYNC,
}
},
.desc_hdr_template = DESC_HDR_TYPE_COMMON_NONSNOOP_NO_AFEU |
DESC_HDR_SEL0_MDEUA |
DESC_HDR_MODE0_MDEU_SHA224,
},
{ .type = CRYPTO_ALG_TYPE_AHASH,
.alg.hash = {
.halg.digestsize = SHA256_DIGEST_SIZE,
.halg.statesize = sizeof(struct talitos_export_state),
.halg.base = {
.cra_name = "sha256",
.cra_driver_name = "sha256-talitos",
.cra_blocksize = SHA256_BLOCK_SIZE,
.cra_flags = CRYPTO_ALG_TYPE_AHASH |
CRYPTO_ALG_ASYNC,
}
},
.desc_hdr_template = DESC_HDR_TYPE_COMMON_NONSNOOP_NO_AFEU |
DESC_HDR_SEL0_MDEUA |
DESC_HDR_MODE0_MDEU_SHA256,
},
{ .type = CRYPTO_ALG_TYPE_AHASH,
.alg.hash = {
.halg.digestsize = SHA384_DIGEST_SIZE,
.halg.statesize = sizeof(struct talitos_export_state),
.halg.base = {
.cra_name = "sha384",
.cra_driver_name = "sha384-talitos",
.cra_blocksize = SHA384_BLOCK_SIZE,
.cra_flags = CRYPTO_ALG_TYPE_AHASH |
CRYPTO_ALG_ASYNC,
}
},
.desc_hdr_template = DESC_HDR_TYPE_COMMON_NONSNOOP_NO_AFEU |
DESC_HDR_SEL0_MDEUB |
DESC_HDR_MODE0_MDEUB_SHA384,
},
{ .type = CRYPTO_ALG_TYPE_AHASH,
.alg.hash = {
.halg.digestsize = SHA512_DIGEST_SIZE,
.halg.statesize = sizeof(struct talitos_export_state),
.halg.base = {
.cra_name = "sha512",
.cra_driver_name = "sha512-talitos",
.cra_blocksize = SHA512_BLOCK_SIZE,
.cra_flags = CRYPTO_ALG_TYPE_AHASH |
CRYPTO_ALG_ASYNC,
}
},
.desc_hdr_template = DESC_HDR_TYPE_COMMON_NONSNOOP_NO_AFEU |
DESC_HDR_SEL0_MDEUB |
DESC_HDR_MODE0_MDEUB_SHA512,
},
{ .type = CRYPTO_ALG_TYPE_AHASH,
.alg.hash = {
.halg.digestsize = MD5_DIGEST_SIZE,
.halg.statesize = sizeof(struct talitos_export_state),
.halg.base = {
.cra_name = "hmac(md5)",
.cra_driver_name = "hmac-md5-talitos",
.cra_blocksize = MD5_HMAC_BLOCK_SIZE,
.cra_flags = CRYPTO_ALG_TYPE_AHASH |
CRYPTO_ALG_ASYNC,
}
},
.desc_hdr_template = DESC_HDR_TYPE_COMMON_NONSNOOP_NO_AFEU |
DESC_HDR_SEL0_MDEUA |
DESC_HDR_MODE0_MDEU_MD5,
},
{ .type = CRYPTO_ALG_TYPE_AHASH,
.alg.hash = {
.halg.digestsize = SHA1_DIGEST_SIZE,
.halg.statesize = sizeof(struct talitos_export_state),
.halg.base = {
.cra_name = "hmac(sha1)",
.cra_driver_name = "hmac-sha1-talitos",
.cra_blocksize = SHA1_BLOCK_SIZE,
.cra_flags = CRYPTO_ALG_TYPE_AHASH |
CRYPTO_ALG_ASYNC,
}
},
.desc_hdr_template = DESC_HDR_TYPE_COMMON_NONSNOOP_NO_AFEU |
DESC_HDR_SEL0_MDEUA |
DESC_HDR_MODE0_MDEU_SHA1,
},
{ .type = CRYPTO_ALG_TYPE_AHASH,
.alg.hash = {
.halg.digestsize = SHA224_DIGEST_SIZE,
.halg.statesize = sizeof(struct talitos_export_state),
.halg.base = {
.cra_name = "hmac(sha224)",
.cra_driver_name = "hmac-sha224-talitos",
.cra_blocksize = SHA224_BLOCK_SIZE,
.cra_flags = CRYPTO_ALG_TYPE_AHASH |
CRYPTO_ALG_ASYNC,
}
},
.desc_hdr_template = DESC_HDR_TYPE_COMMON_NONSNOOP_NO_AFEU |
DESC_HDR_SEL0_MDEUA |
DESC_HDR_MODE0_MDEU_SHA224,
},
{ .type = CRYPTO_ALG_TYPE_AHASH,
.alg.hash = {
.halg.digestsize = SHA256_DIGEST_SIZE,
.halg.statesize = sizeof(struct talitos_export_state),
.halg.base = {
.cra_name = "hmac(sha256)",
.cra_driver_name = "hmac-sha256-talitos",
.cra_blocksize = SHA256_BLOCK_SIZE,
.cra_flags = CRYPTO_ALG_TYPE_AHASH |
CRYPTO_ALG_ASYNC,
}
},
.desc_hdr_template = DESC_HDR_TYPE_COMMON_NONSNOOP_NO_AFEU |
DESC_HDR_SEL0_MDEUA |
DESC_HDR_MODE0_MDEU_SHA256,
},
{ .type = CRYPTO_ALG_TYPE_AHASH,
.alg.hash = {
.halg.digestsize = SHA384_DIGEST_SIZE,
.halg.statesize = sizeof(struct talitos_export_state),
.halg.base = {
.cra_name = "hmac(sha384)",
.cra_driver_name = "hmac-sha384-talitos",
.cra_blocksize = SHA384_BLOCK_SIZE,
.cra_flags = CRYPTO_ALG_TYPE_AHASH |
CRYPTO_ALG_ASYNC,
}
},
.desc_hdr_template = DESC_HDR_TYPE_COMMON_NONSNOOP_NO_AFEU |
DESC_HDR_SEL0_MDEUB |
DESC_HDR_MODE0_MDEUB_SHA384,
},
{ .type = CRYPTO_ALG_TYPE_AHASH,
.alg.hash = {
.halg.digestsize = SHA512_DIGEST_SIZE,
.halg.statesize = sizeof(struct talitos_export_state),
.halg.base = {
.cra_name = "hmac(sha512)",
.cra_driver_name = "hmac-sha512-talitos",
.cra_blocksize = SHA512_BLOCK_SIZE,
.cra_flags = CRYPTO_ALG_TYPE_AHASH |
CRYPTO_ALG_ASYNC,
}
},
.desc_hdr_template = DESC_HDR_TYPE_COMMON_NONSNOOP_NO_AFEU |
DESC_HDR_SEL0_MDEUB |
DESC_HDR_MODE0_MDEUB_SHA512,
}
};
struct talitos_crypto_alg {
struct list_head entry;
struct device *dev;
struct talitos_alg_template algt;
};
crypto: talitos - fix crash in talitos_cra_init() Conversion of talitos driver to the new AEAD interface hasn't been properly tested. AEAD algorithms crash in talitos_cra_init as follows: [...] [ 1.141095] talitos ffe30000.crypto: hwrng [ 1.145381] Unable to handle kernel paging request for data at address 0x00000058 [ 1.152913] Faulting instruction address: 0xc02accc0 [ 1.157910] Oops: Kernel access of bad area, sig: 11 [#1] [ 1.163315] SMP NR_CPUS=2 P1020 RDB [ 1.166810] Modules linked in: [ 1.169875] CPU: 0 PID: 1007 Comm: cryptomgr_test Not tainted 4.4.6 #1 [ 1.176415] task: db5ec200 ti: db4d6000 task.ti: db4d6000 [ 1.181821] NIP: c02accc0 LR: c02acd18 CTR: c02acd04 [ 1.186793] REGS: db4d7d30 TRAP: 0300 Not tainted (4.4.6) [ 1.192457] MSR: 00029000 <CE,EE,ME> CR: 95009359 XER: e0000000 [ 1.198585] DEAR: 00000058 ESR: 00000000 GPR00: c017bdc0 db4d7de0 db5ec200 df424b48 00000000 00000000 df424bfc db75a600 GPR08: df424b48 00000000 db75a628 db4d6000 00000149 00000000 c0044cac db5acda0 GPR16: 00000000 00000000 00000000 00000000 00000000 00000000 00000400 df424940 GPR24: df424900 00003083 00000400 c0180000 db75a640 c03e9f84 df424b40 df424b48 [ 1.230978] NIP [c02accc0] talitos_cra_init+0x28/0x6c [ 1.236039] LR [c02acd18] talitos_cra_init_aead+0x14/0x28 [ 1.241443] Call Trace: [ 1.243894] [db4d7de0] [c03e9f84] 0xc03e9f84 (unreliable) [ 1.249322] [db4d7df0] [c017bdc0] crypto_create_tfm+0x5c/0xf0 [ 1.255083] [db4d7e10] [c017beec] crypto_alloc_tfm+0x98/0xf8 [ 1.260769] [db4d7e40] [c0186a20] alg_test_aead+0x28/0xc8 [ 1.266181] [db4d7e60] [c0186718] alg_test+0x260/0x2e0 [ 1.271333] [db4d7ee0] [c0183860] cryptomgr_test+0x30/0x54 [ 1.276843] [db4d7ef0] [c0044d80] kthread+0xd4/0xd8 [ 1.281741] [db4d7f40] [c000e4a4] ret_from_kernel_thread+0x5c/0x64 [ 1.287930] Instruction dump: [ 1.290902] 38600000 4e800020 81230028 7c681b78 81490010 38e9ffc0 3929ffe8 554a073e [ 1.298691] 2b8a000a 7d474f9e 812a0008 91230030 <80e90058> 39270060 7c0004ac 7cc04828 Cc: <stable@vger.kernel.org> # 4.3+ Fixes: aeb4c132f33d ("crypto: talitos - Convert to new AEAD interface") Signed-off-by: Jonas Eymann <J.Eymann@gmx.net> Fix typo - replaced parameter of __crypto_ahash_alg(): s/tfm/alg Remove checkpatch warnings. Add commit message. Signed-off-by: Horia Geant? <horia.geanta@nxp.com> Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
2016-04-20 01:33:47 +08:00
static int talitos_init_common(struct talitos_ctx *ctx,
struct talitos_crypto_alg *talitos_alg)
{
struct talitos_private *priv;
/* update context with ptr to dev */
ctx->dev = talitos_alg->dev;
/* assign SEC channel to tfm in round-robin fashion */
priv = dev_get_drvdata(ctx->dev);
ctx->ch = atomic_inc_return(&priv->last_chan) &
(priv->num_channels - 1);
/* copy descriptor header template value */
ctx->desc_hdr_template = talitos_alg->algt.desc_hdr_template;
/* select done notification */
ctx->desc_hdr_template |= DESC_HDR_DONE_NOTIFY;
return 0;
}
crypto: talitos - fix crash in talitos_cra_init() Conversion of talitos driver to the new AEAD interface hasn't been properly tested. AEAD algorithms crash in talitos_cra_init as follows: [...] [ 1.141095] talitos ffe30000.crypto: hwrng [ 1.145381] Unable to handle kernel paging request for data at address 0x00000058 [ 1.152913] Faulting instruction address: 0xc02accc0 [ 1.157910] Oops: Kernel access of bad area, sig: 11 [#1] [ 1.163315] SMP NR_CPUS=2 P1020 RDB [ 1.166810] Modules linked in: [ 1.169875] CPU: 0 PID: 1007 Comm: cryptomgr_test Not tainted 4.4.6 #1 [ 1.176415] task: db5ec200 ti: db4d6000 task.ti: db4d6000 [ 1.181821] NIP: c02accc0 LR: c02acd18 CTR: c02acd04 [ 1.186793] REGS: db4d7d30 TRAP: 0300 Not tainted (4.4.6) [ 1.192457] MSR: 00029000 <CE,EE,ME> CR: 95009359 XER: e0000000 [ 1.198585] DEAR: 00000058 ESR: 00000000 GPR00: c017bdc0 db4d7de0 db5ec200 df424b48 00000000 00000000 df424bfc db75a600 GPR08: df424b48 00000000 db75a628 db4d6000 00000149 00000000 c0044cac db5acda0 GPR16: 00000000 00000000 00000000 00000000 00000000 00000000 00000400 df424940 GPR24: df424900 00003083 00000400 c0180000 db75a640 c03e9f84 df424b40 df424b48 [ 1.230978] NIP [c02accc0] talitos_cra_init+0x28/0x6c [ 1.236039] LR [c02acd18] talitos_cra_init_aead+0x14/0x28 [ 1.241443] Call Trace: [ 1.243894] [db4d7de0] [c03e9f84] 0xc03e9f84 (unreliable) [ 1.249322] [db4d7df0] [c017bdc0] crypto_create_tfm+0x5c/0xf0 [ 1.255083] [db4d7e10] [c017beec] crypto_alloc_tfm+0x98/0xf8 [ 1.260769] [db4d7e40] [c0186a20] alg_test_aead+0x28/0xc8 [ 1.266181] [db4d7e60] [c0186718] alg_test+0x260/0x2e0 [ 1.271333] [db4d7ee0] [c0183860] cryptomgr_test+0x30/0x54 [ 1.276843] [db4d7ef0] [c0044d80] kthread+0xd4/0xd8 [ 1.281741] [db4d7f40] [c000e4a4] ret_from_kernel_thread+0x5c/0x64 [ 1.287930] Instruction dump: [ 1.290902] 38600000 4e800020 81230028 7c681b78 81490010 38e9ffc0 3929ffe8 554a073e [ 1.298691] 2b8a000a 7d474f9e 812a0008 91230030 <80e90058> 39270060 7c0004ac 7cc04828 Cc: <stable@vger.kernel.org> # 4.3+ Fixes: aeb4c132f33d ("crypto: talitos - Convert to new AEAD interface") Signed-off-by: Jonas Eymann <J.Eymann@gmx.net> Fix typo - replaced parameter of __crypto_ahash_alg(): s/tfm/alg Remove checkpatch warnings. Add commit message. Signed-off-by: Horia Geant? <horia.geanta@nxp.com> Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
2016-04-20 01:33:47 +08:00
static int talitos_cra_init(struct crypto_tfm *tfm)
{
struct crypto_alg *alg = tfm->__crt_alg;
struct talitos_crypto_alg *talitos_alg;
struct talitos_ctx *ctx = crypto_tfm_ctx(tfm);
if ((alg->cra_flags & CRYPTO_ALG_TYPE_MASK) == CRYPTO_ALG_TYPE_AHASH)
talitos_alg = container_of(__crypto_ahash_alg(alg),
struct talitos_crypto_alg,
algt.alg.hash);
else
talitos_alg = container_of(alg, struct talitos_crypto_alg,
algt.alg.crypto);
return talitos_init_common(ctx, talitos_alg);
}
static int talitos_cra_init_aead(struct crypto_aead *tfm)
{
crypto: talitos - fix crash in talitos_cra_init() Conversion of talitos driver to the new AEAD interface hasn't been properly tested. AEAD algorithms crash in talitos_cra_init as follows: [...] [ 1.141095] talitos ffe30000.crypto: hwrng [ 1.145381] Unable to handle kernel paging request for data at address 0x00000058 [ 1.152913] Faulting instruction address: 0xc02accc0 [ 1.157910] Oops: Kernel access of bad area, sig: 11 [#1] [ 1.163315] SMP NR_CPUS=2 P1020 RDB [ 1.166810] Modules linked in: [ 1.169875] CPU: 0 PID: 1007 Comm: cryptomgr_test Not tainted 4.4.6 #1 [ 1.176415] task: db5ec200 ti: db4d6000 task.ti: db4d6000 [ 1.181821] NIP: c02accc0 LR: c02acd18 CTR: c02acd04 [ 1.186793] REGS: db4d7d30 TRAP: 0300 Not tainted (4.4.6) [ 1.192457] MSR: 00029000 <CE,EE,ME> CR: 95009359 XER: e0000000 [ 1.198585] DEAR: 00000058 ESR: 00000000 GPR00: c017bdc0 db4d7de0 db5ec200 df424b48 00000000 00000000 df424bfc db75a600 GPR08: df424b48 00000000 db75a628 db4d6000 00000149 00000000 c0044cac db5acda0 GPR16: 00000000 00000000 00000000 00000000 00000000 00000000 00000400 df424940 GPR24: df424900 00003083 00000400 c0180000 db75a640 c03e9f84 df424b40 df424b48 [ 1.230978] NIP [c02accc0] talitos_cra_init+0x28/0x6c [ 1.236039] LR [c02acd18] talitos_cra_init_aead+0x14/0x28 [ 1.241443] Call Trace: [ 1.243894] [db4d7de0] [c03e9f84] 0xc03e9f84 (unreliable) [ 1.249322] [db4d7df0] [c017bdc0] crypto_create_tfm+0x5c/0xf0 [ 1.255083] [db4d7e10] [c017beec] crypto_alloc_tfm+0x98/0xf8 [ 1.260769] [db4d7e40] [c0186a20] alg_test_aead+0x28/0xc8 [ 1.266181] [db4d7e60] [c0186718] alg_test+0x260/0x2e0 [ 1.271333] [db4d7ee0] [c0183860] cryptomgr_test+0x30/0x54 [ 1.276843] [db4d7ef0] [c0044d80] kthread+0xd4/0xd8 [ 1.281741] [db4d7f40] [c000e4a4] ret_from_kernel_thread+0x5c/0x64 [ 1.287930] Instruction dump: [ 1.290902] 38600000 4e800020 81230028 7c681b78 81490010 38e9ffc0 3929ffe8 554a073e [ 1.298691] 2b8a000a 7d474f9e 812a0008 91230030 <80e90058> 39270060 7c0004ac 7cc04828 Cc: <stable@vger.kernel.org> # 4.3+ Fixes: aeb4c132f33d ("crypto: talitos - Convert to new AEAD interface") Signed-off-by: Jonas Eymann <J.Eymann@gmx.net> Fix typo - replaced parameter of __crypto_ahash_alg(): s/tfm/alg Remove checkpatch warnings. Add commit message. Signed-off-by: Horia Geant? <horia.geanta@nxp.com> Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
2016-04-20 01:33:47 +08:00
struct aead_alg *alg = crypto_aead_alg(tfm);
struct talitos_crypto_alg *talitos_alg;
struct talitos_ctx *ctx = crypto_aead_ctx(tfm);
talitos_alg = container_of(alg, struct talitos_crypto_alg,
algt.alg.aead);
return talitos_init_common(ctx, talitos_alg);
}
static int talitos_cra_init_ahash(struct crypto_tfm *tfm)
{
struct talitos_ctx *ctx = crypto_tfm_ctx(tfm);
talitos_cra_init(tfm);
ctx->keylen = 0;
crypto_ahash_set_reqsize(__crypto_ahash_cast(tfm),
sizeof(struct talitos_ahash_req_ctx));
return 0;
}
static void talitos_cra_exit(struct crypto_tfm *tfm)
{
struct talitos_ctx *ctx = crypto_tfm_ctx(tfm);
struct device *dev = ctx->dev;
if (ctx->keylen)
dma_unmap_single(dev, ctx->dma_key, ctx->keylen, DMA_TO_DEVICE);
}
static void talitos_cra_exit_ahash(struct crypto_tfm *tfm)
{
struct talitos_ctx *ctx = crypto_tfm_ctx(tfm);
struct device *dev = ctx->dev;
unsigned int size;
talitos_cra_exit(tfm);
size = (crypto_ahash_digestsize(__crypto_ahash_cast(tfm)) <=
SHA256_DIGEST_SIZE)
? TALITOS_MDEU_CONTEXT_SIZE_MD5_SHA1_SHA256
: TALITOS_MDEU_CONTEXT_SIZE_SHA384_SHA512;
if (ctx->dma_hw_context)
dma_unmap_single(dev, ctx->dma_hw_context, size,
DMA_BIDIRECTIONAL);
crypto: talitos - chain in buffered data for ahash on SEC1 SEC1 doesn't support S/G in descriptors so for hash operations, the CPU has to build a buffer containing the buffered block and the incoming data. This generates a lot of memory copies which represents more than 50% of CPU time of a md5sum operation as shown below with a 'perf record'. |--86.24%-- kcapi_md_digest | | | |--86.18%-- _kcapi_common_vmsplice_chunk_fd | | | | | |--83.68%-- splice | | | | | | | |--83.59%-- ret_from_syscall | | | | | | | | | |--83.52%-- sys_splice | | | | | | | | | | | |--83.49%-- splice_from_pipe | | | | | | | | | | | | | |--83.04%-- __splice_from_pipe | | | | | | | | | | | | | | | |--80.67%-- pipe_to_sendpage | | | | | | | | | | | | | | | | | |--78.25%-- hash_sendpage | | | | | | | | | | | | | | | | | | | |--60.08%-- ahash_process_req | | | | | | | | | | | | | | | | | | | | | |--56.36%-- sg_copy_buffer | | | | | | | | | | | | | | | | | | | | | | | |--55.29%-- memcpy | | | | | | | | | | | | However, unlike SEC2+, SEC1 offers the possibility to chain descriptors. It is therefore possible to build a first descriptor pointing to the buffered data and a second descriptor pointing to the incoming data, hence avoiding the memory copy to a single buffer. With this patch, the time necessary for a md5sum on a 90Mbytes file is approximately 3 seconds. Without the patch it takes 6 seconds. Signed-off-by: Christophe Leroy <christophe.leroy@c-s.fr> Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
2017-10-06 21:05:06 +08:00
if (ctx->dma_buf)
dma_unmap_single(dev, ctx->dma_buf, HASH_MAX_BLOCK_SIZE * 2,
crypto: talitos - chain in buffered data for ahash on SEC1 SEC1 doesn't support S/G in descriptors so for hash operations, the CPU has to build a buffer containing the buffered block and the incoming data. This generates a lot of memory copies which represents more than 50% of CPU time of a md5sum operation as shown below with a 'perf record'. |--86.24%-- kcapi_md_digest | | | |--86.18%-- _kcapi_common_vmsplice_chunk_fd | | | | | |--83.68%-- splice | | | | | | | |--83.59%-- ret_from_syscall | | | | | | | | | |--83.52%-- sys_splice | | | | | | | | | | | |--83.49%-- splice_from_pipe | | | | | | | | | | | | | |--83.04%-- __splice_from_pipe | | | | | | | | | | | | | | | |--80.67%-- pipe_to_sendpage | | | | | | | | | | | | | | | | | |--78.25%-- hash_sendpage | | | | | | | | | | | | | | | | | | | |--60.08%-- ahash_process_req | | | | | | | | | | | | | | | | | | | | | |--56.36%-- sg_copy_buffer | | | | | | | | | | | | | | | | | | | | | | | |--55.29%-- memcpy | | | | | | | | | | | | However, unlike SEC2+, SEC1 offers the possibility to chain descriptors. It is therefore possible to build a first descriptor pointing to the buffered data and a second descriptor pointing to the incoming data, hence avoiding the memory copy to a single buffer. With this patch, the time necessary for a md5sum on a 90Mbytes file is approximately 3 seconds. Without the patch it takes 6 seconds. Signed-off-by: Christophe Leroy <christophe.leroy@c-s.fr> Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
2017-10-06 21:05:06 +08:00
DMA_TO_DEVICE);
}
/*
* given the alg's descriptor header template, determine whether descriptor
* type and primary/secondary execution units required match the hw
* capabilities description provided in the device tree node.
*/
static int hw_supports(struct device *dev, __be32 desc_hdr_template)
{
struct talitos_private *priv = dev_get_drvdata(dev);
int ret;
ret = (1 << DESC_TYPE(desc_hdr_template) & priv->desc_types) &&
(1 << PRIMARY_EU(desc_hdr_template) & priv->exec_units);
if (SECONDARY_EU(desc_hdr_template))
ret = ret && (1 << SECONDARY_EU(desc_hdr_template)
& priv->exec_units);
return ret;
}
static int talitos_remove(struct platform_device *ofdev)
{
struct device *dev = &ofdev->dev;
struct talitos_private *priv = dev_get_drvdata(dev);
struct talitos_crypto_alg *t_alg, *n;
int i;
list_for_each_entry_safe(t_alg, n, &priv->alg_list, entry) {
switch (t_alg->algt.type) {
case CRYPTO_ALG_TYPE_ABLKCIPHER:
break;
case CRYPTO_ALG_TYPE_AEAD:
crypto_unregister_aead(&t_alg->algt.alg.aead);
case CRYPTO_ALG_TYPE_AHASH:
crypto_unregister_ahash(&t_alg->algt.alg.hash);
break;
}
list_del(&t_alg->entry);
}
if (hw_supports(dev, DESC_HDR_SEL0_RNG))
talitos_unregister_rng(dev);
for (i = 0; i < 2; i++)
if (priv->irq[i]) {
free_irq(priv->irq[i], dev);
irq_dispose_mapping(priv->irq[i]);
}
tasklet_kill(&priv->done_task[0]);
if (priv->irq[1])
tasklet_kill(&priv->done_task[1]);
return 0;
}
static struct talitos_crypto_alg *talitos_alg_alloc(struct device *dev,
struct talitos_alg_template
*template)
{
struct talitos_private *priv = dev_get_drvdata(dev);
struct talitos_crypto_alg *t_alg;
struct crypto_alg *alg;
t_alg = devm_kzalloc(dev, sizeof(struct talitos_crypto_alg),
GFP_KERNEL);
if (!t_alg)
return ERR_PTR(-ENOMEM);
t_alg->algt = *template;
switch (t_alg->algt.type) {
case CRYPTO_ALG_TYPE_ABLKCIPHER:
alg = &t_alg->algt.alg.crypto;
alg->cra_init = talitos_cra_init;
alg->cra_exit = talitos_cra_exit;
alg->cra_type = &crypto_ablkcipher_type;
alg->cra_ablkcipher.setkey = ablkcipher_setkey;
alg->cra_ablkcipher.encrypt = ablkcipher_encrypt;
alg->cra_ablkcipher.decrypt = ablkcipher_decrypt;
alg->cra_ablkcipher.geniv = "eseqiv";
break;
case CRYPTO_ALG_TYPE_AEAD:
alg = &t_alg->algt.alg.aead.base;
alg->cra_exit = talitos_cra_exit;
t_alg->algt.alg.aead.init = talitos_cra_init_aead;
t_alg->algt.alg.aead.setkey = aead_setkey;
t_alg->algt.alg.aead.encrypt = aead_encrypt;
t_alg->algt.alg.aead.decrypt = aead_decrypt;
if (!(priv->features & TALITOS_FTR_SHA224_HWINIT) &&
!strncmp(alg->cra_name, "authenc(hmac(sha224)", 20)) {
devm_kfree(dev, t_alg);
return ERR_PTR(-ENOTSUPP);
}
break;
case CRYPTO_ALG_TYPE_AHASH:
alg = &t_alg->algt.alg.hash.halg.base;
alg->cra_init = talitos_cra_init_ahash;
alg->cra_exit = talitos_cra_exit_ahash;
alg->cra_type = &crypto_ahash_type;
t_alg->algt.alg.hash.init = ahash_init;
t_alg->algt.alg.hash.update = ahash_update;
t_alg->algt.alg.hash.final = ahash_final;
t_alg->algt.alg.hash.finup = ahash_finup;
t_alg->algt.alg.hash.digest = ahash_digest;
if (!strncmp(alg->cra_name, "hmac", 4))
t_alg->algt.alg.hash.setkey = ahash_setkey;
t_alg->algt.alg.hash.import = ahash_import;
t_alg->algt.alg.hash.export = ahash_export;
if (!(priv->features & TALITOS_FTR_HMAC_OK) &&
!strncmp(alg->cra_name, "hmac", 4)) {
devm_kfree(dev, t_alg);
return ERR_PTR(-ENOTSUPP);
}
if (!(priv->features & TALITOS_FTR_SHA224_HWINIT) &&
(!strcmp(alg->cra_name, "sha224") ||
!strcmp(alg->cra_name, "hmac(sha224)"))) {
t_alg->algt.alg.hash.init = ahash_init_sha224_swinit;
t_alg->algt.desc_hdr_template =
DESC_HDR_TYPE_COMMON_NONSNOOP_NO_AFEU |
DESC_HDR_SEL0_MDEUA |
DESC_HDR_MODE0_MDEU_SHA256;
}
break;
default:
dev_err(dev, "unknown algorithm type %d\n", t_alg->algt.type);
devm_kfree(dev, t_alg);
return ERR_PTR(-EINVAL);
}
alg->cra_module = THIS_MODULE;
if (t_alg->algt.priority)
alg->cra_priority = t_alg->algt.priority;
else
alg->cra_priority = TALITOS_CRA_PRIORITY;
alg->cra_alignmask = 0;
alg->cra_ctxsize = sizeof(struct talitos_ctx);
alg->cra_flags |= CRYPTO_ALG_KERN_DRIVER_ONLY;
t_alg->dev = dev;
return t_alg;
}
static int talitos_probe_irq(struct platform_device *ofdev)
{
struct device *dev = &ofdev->dev;
struct device_node *np = ofdev->dev.of_node;
struct talitos_private *priv = dev_get_drvdata(dev);
int err;
bool is_sec1 = has_ftr_sec1(priv);
priv->irq[0] = irq_of_parse_and_map(np, 0);
if (!priv->irq[0]) {
dev_err(dev, "failed to map irq\n");
return -EINVAL;
}
if (is_sec1) {
err = request_irq(priv->irq[0], talitos1_interrupt_4ch, 0,
dev_driver_string(dev), dev);
goto primary_out;
}
priv->irq[1] = irq_of_parse_and_map(np, 1);
/* get the primary irq line */
if (!priv->irq[1]) {
err = request_irq(priv->irq[0], talitos2_interrupt_4ch, 0,
dev_driver_string(dev), dev);
goto primary_out;
}
err = request_irq(priv->irq[0], talitos2_interrupt_ch0_2, 0,
dev_driver_string(dev), dev);
if (err)
goto primary_out;
/* get the secondary irq line */
err = request_irq(priv->irq[1], talitos2_interrupt_ch1_3, 0,
dev_driver_string(dev), dev);
if (err) {
dev_err(dev, "failed to request secondary irq\n");
irq_dispose_mapping(priv->irq[1]);
priv->irq[1] = 0;
}
return err;
primary_out:
if (err) {
dev_err(dev, "failed to request primary irq\n");
irq_dispose_mapping(priv->irq[0]);
priv->irq[0] = 0;
}
return err;
}
static int talitos_probe(struct platform_device *ofdev)
{
struct device *dev = &ofdev->dev;
struct device_node *np = ofdev->dev.of_node;
struct talitos_private *priv;
int i, err;
int stride;
struct resource *res;
priv = devm_kzalloc(dev, sizeof(struct talitos_private), GFP_KERNEL);
if (!priv)
return -ENOMEM;
crypto: talitos: init the priv->alg_list more earlier in talitos_probe() In function talitos_probe(), it will jump to err_out when getting an error in talitos_probe_irq(). Then the uninitialized list head priv->alg_list will be used in function talitos_remove(). In this case we would get a call trace like the following. So move up the initialization of priv->alg_list. Unable to handle kernel paging request for data at address 0x00000000 Faulting instruction address: 0xc0459ff4 Oops: Kernel access of bad area, sig: 11 [#1] SMP NR_CPUS=8 P1020 RDB Modules linked in: CPU: 1 PID: 1 Comm: swapper/0 Tainted: G W 3.13.0-08789-g54c0a4b46150 #33 task: cf050000 ti: cf04c000 task.ti: cf04c000 NIP: c0459ff4 LR: c0459fd4 CTR: c02f2438 REGS: cf04dcb0 TRAP: 0300 Tainted: G W (3.13.0-08789-g54c0a4b46150) MSR: 00029000 <CE,EE,ME> CR: 82000028 XER: 20000000 DEAR: 00000000 ESR: 00000000 GPR00: c045ac28 cf04dd60 cf050000 cf2579c0 00021000 00000000 c02f35b0 0000014e GPR08: c07e702c cf104300 c07e702c 0000014e 22000024 00000000 c0002a3c 00000000 GPR16: 00000000 00000000 00000000 00000000 00000000 00000000 c082e4e0 000000df GPR24: 00000000 00100100 00200200 cf257a2c cf0efe10 cf2579c0 cf0efe10 00000000 NIP [c0459ff4] talitos_remove+0x3c/0x1c8 LR [c0459fd4] talitos_remove+0x1c/0x1c8 Call Trace: [cf04dd60] [c07485d8] __func__.13331+0x1241c8/0x1391c0 (unreliable) [cf04dd90] [c045ac28] talitos_probe+0x244/0x998 [cf04dde0] [c0306a74] platform_drv_probe+0x28/0x68 [cf04ddf0] [c0304d38] really_probe+0x78/0x250 [cf04de10] [c030505c] __driver_attach+0xc8/0xcc [cf04de30] [c0302e98] bus_for_each_dev+0x6c/0xb8 [cf04de60] [c03043cc] bus_add_driver+0x168/0x220 [cf04de80] [c0305798] driver_register+0x88/0x130 [cf04de90] [c0002458] do_one_initcall+0x14c/0x198 [cf04df00] [c079f904] kernel_init_freeable+0x138/0x1d4 [cf04df30] [c0002a50] kernel_init+0x14/0x124 [cf04df40] [c000ec40] ret_from_kernel_thread+0x5c/0x64 Signed-off-by: Kevin Hao <haokexin@gmail.com> Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
2014-01-28 20:17:23 +08:00
INIT_LIST_HEAD(&priv->alg_list);
dev_set_drvdata(dev, priv);
priv->ofdev = ofdev;
spin_lock_init(&priv->reg_lock);
res = platform_get_resource(ofdev, IORESOURCE_MEM, 0);
if (!res)
return -ENXIO;
priv->reg = devm_ioremap(dev, res->start, resource_size(res));
if (!priv->reg) {
dev_err(dev, "failed to of_iomap\n");
err = -ENOMEM;
goto err_out;
}
/* get SEC version capabilities from device tree */
of_property_read_u32(np, "fsl,num-channels", &priv->num_channels);
of_property_read_u32(np, "fsl,channel-fifo-len", &priv->chfifo_len);
of_property_read_u32(np, "fsl,exec-units-mask", &priv->exec_units);
of_property_read_u32(np, "fsl,descriptor-types-mask",
&priv->desc_types);
if (!is_power_of_2(priv->num_channels) || !priv->chfifo_len ||
!priv->exec_units || !priv->desc_types) {
dev_err(dev, "invalid property data in device tree node\n");
err = -EINVAL;
goto err_out;
}
if (of_device_is_compatible(np, "fsl,sec3.0"))
priv->features |= TALITOS_FTR_SRC_LINK_TBL_LEN_INCLUDES_EXTENT;
if (of_device_is_compatible(np, "fsl,sec2.1"))
priv->features |= TALITOS_FTR_HW_AUTH_CHECK |
TALITOS_FTR_SHA224_HWINIT |
TALITOS_FTR_HMAC_OK;
if (of_device_is_compatible(np, "fsl,sec1.0"))
priv->features |= TALITOS_FTR_SEC1;
if (of_device_is_compatible(np, "fsl,sec1.2")) {
priv->reg_deu = priv->reg + TALITOS12_DEU;
priv->reg_aesu = priv->reg + TALITOS12_AESU;
priv->reg_mdeu = priv->reg + TALITOS12_MDEU;
stride = TALITOS1_CH_STRIDE;
} else if (of_device_is_compatible(np, "fsl,sec1.0")) {
priv->reg_deu = priv->reg + TALITOS10_DEU;
priv->reg_aesu = priv->reg + TALITOS10_AESU;
priv->reg_mdeu = priv->reg + TALITOS10_MDEU;
priv->reg_afeu = priv->reg + TALITOS10_AFEU;
priv->reg_rngu = priv->reg + TALITOS10_RNGU;
priv->reg_pkeu = priv->reg + TALITOS10_PKEU;
stride = TALITOS1_CH_STRIDE;
} else {
priv->reg_deu = priv->reg + TALITOS2_DEU;
priv->reg_aesu = priv->reg + TALITOS2_AESU;
priv->reg_mdeu = priv->reg + TALITOS2_MDEU;
priv->reg_afeu = priv->reg + TALITOS2_AFEU;
priv->reg_rngu = priv->reg + TALITOS2_RNGU;
priv->reg_pkeu = priv->reg + TALITOS2_PKEU;
priv->reg_keu = priv->reg + TALITOS2_KEU;
priv->reg_crcu = priv->reg + TALITOS2_CRCU;
stride = TALITOS2_CH_STRIDE;
}
err = talitos_probe_irq(ofdev);
if (err)
goto err_out;
if (of_device_is_compatible(np, "fsl,sec1.0")) {
if (priv->num_channels == 1)
tasklet_init(&priv->done_task[0], talitos1_done_ch0,
(unsigned long)dev);
else
tasklet_init(&priv->done_task[0], talitos1_done_4ch,
(unsigned long)dev);
} else {
if (priv->irq[1]) {
tasklet_init(&priv->done_task[0], talitos2_done_ch0_2,
(unsigned long)dev);
tasklet_init(&priv->done_task[1], talitos2_done_ch1_3,
(unsigned long)dev);
} else if (priv->num_channels == 1) {
tasklet_init(&priv->done_task[0], talitos2_done_ch0,
(unsigned long)dev);
} else {
tasklet_init(&priv->done_task[0], talitos2_done_4ch,
(unsigned long)dev);
}
}
priv->chan = devm_kzalloc(dev, sizeof(struct talitos_channel) *
priv->num_channels, GFP_KERNEL);
if (!priv->chan) {
dev_err(dev, "failed to allocate channel management space\n");
err = -ENOMEM;
goto err_out;
}
priv->fifo_len = roundup_pow_of_two(priv->chfifo_len);
for (i = 0; i < priv->num_channels; i++) {
priv->chan[i].reg = priv->reg + stride * (i + 1);
if (!priv->irq[1] || !(i & 1))
priv->chan[i].reg += TALITOS_CH_BASE_OFFSET;
spin_lock_init(&priv->chan[i].head_lock);
spin_lock_init(&priv->chan[i].tail_lock);
priv->chan[i].fifo = devm_kzalloc(dev,
sizeof(struct talitos_request) *
priv->fifo_len, GFP_KERNEL);
if (!priv->chan[i].fifo) {
dev_err(dev, "failed to allocate request fifo %d\n", i);
err = -ENOMEM;
goto err_out;
}
atomic_set(&priv->chan[i].submit_count,
-(priv->chfifo_len - 1));
}
dma_set_mask(dev, DMA_BIT_MASK(36));
/* reset and initialize the h/w */
err = init_device(dev);
if (err) {
dev_err(dev, "failed to initialize device\n");
goto err_out;
}
/* register the RNG, if available */
if (hw_supports(dev, DESC_HDR_SEL0_RNG)) {
err = talitos_register_rng(dev);
if (err) {
dev_err(dev, "failed to register hwrng: %d\n", err);
goto err_out;
} else
dev_info(dev, "hwrng\n");
}
/* register crypto algorithms the device supports */
for (i = 0; i < ARRAY_SIZE(driver_algs); i++) {
if (hw_supports(dev, driver_algs[i].desc_hdr_template)) {
struct talitos_crypto_alg *t_alg;
struct crypto_alg *alg = NULL;
t_alg = talitos_alg_alloc(dev, &driver_algs[i]);
if (IS_ERR(t_alg)) {
err = PTR_ERR(t_alg);
if (err == -ENOTSUPP)
continue;
goto err_out;
}
switch (t_alg->algt.type) {
case CRYPTO_ALG_TYPE_ABLKCIPHER:
err = crypto_register_alg(
&t_alg->algt.alg.crypto);
alg = &t_alg->algt.alg.crypto;
break;
case CRYPTO_ALG_TYPE_AEAD:
err = crypto_register_aead(
&t_alg->algt.alg.aead);
alg = &t_alg->algt.alg.aead.base;
break;
case CRYPTO_ALG_TYPE_AHASH:
err = crypto_register_ahash(
&t_alg->algt.alg.hash);
alg = &t_alg->algt.alg.hash.halg.base;
break;
}
if (err) {
dev_err(dev, "%s alg registration failed\n",
alg->cra_driver_name);
devm_kfree(dev, t_alg);
} else
list_add_tail(&t_alg->entry, &priv->alg_list);
}
}
if (!list_empty(&priv->alg_list))
dev_info(dev, "%s algorithms registered in /proc/crypto\n",
(char *)of_get_property(np, "compatible", NULL));
return 0;
err_out:
talitos_remove(ofdev);
return err;
}
static const struct of_device_id talitos_match[] = {
#ifdef CONFIG_CRYPTO_DEV_TALITOS1
{
.compatible = "fsl,sec1.0",
},
#endif
#ifdef CONFIG_CRYPTO_DEV_TALITOS2
{
.compatible = "fsl,sec2.0",
},
#endif
{},
};
MODULE_DEVICE_TABLE(of, talitos_match);
static struct platform_driver talitos_driver = {
.driver = {
.name = "talitos",
.of_match_table = talitos_match,
},
.probe = talitos_probe,
.remove = talitos_remove,
};
module_platform_driver(talitos_driver);
MODULE_LICENSE("GPL");
MODULE_AUTHOR("Kim Phillips <kim.phillips@freescale.com>");
MODULE_DESCRIPTION("Freescale integrated security engine (SEC) driver");