1063 lines
27 KiB
C
1063 lines
27 KiB
C
// SPDX-License-Identifier: GPL-2.0-or-later
|
|
/*
|
|
* Driver for IBM PowerNV 842 compression accelerator
|
|
*
|
|
* Copyright (C) 2015 Dan Streetman, IBM Corp
|
|
*/
|
|
|
|
#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
|
|
|
|
#include "nx-842.h"
|
|
|
|
#include <linux/timer.h>
|
|
|
|
#include <asm/prom.h>
|
|
#include <asm/icswx.h>
|
|
#include <asm/vas.h>
|
|
#include <asm/reg.h>
|
|
#include <asm/opal-api.h>
|
|
#include <asm/opal.h>
|
|
|
|
MODULE_LICENSE("GPL");
|
|
MODULE_AUTHOR("Dan Streetman <ddstreet@ieee.org>");
|
|
MODULE_DESCRIPTION("842 H/W Compression driver for IBM PowerNV processors");
|
|
MODULE_ALIAS_CRYPTO("842");
|
|
MODULE_ALIAS_CRYPTO("842-nx");
|
|
|
|
#define WORKMEM_ALIGN (CRB_ALIGN)
|
|
#define CSB_WAIT_MAX (5000) /* ms */
|
|
#define VAS_RETRIES (10)
|
|
|
|
struct nx842_workmem {
|
|
/* Below fields must be properly aligned */
|
|
struct coprocessor_request_block crb; /* CRB_ALIGN align */
|
|
struct data_descriptor_entry ddl_in[DDL_LEN_MAX]; /* DDE_ALIGN align */
|
|
struct data_descriptor_entry ddl_out[DDL_LEN_MAX]; /* DDE_ALIGN align */
|
|
/* Above fields must be properly aligned */
|
|
|
|
ktime_t start;
|
|
|
|
char padding[WORKMEM_ALIGN]; /* unused, to allow alignment */
|
|
} __packed __aligned(WORKMEM_ALIGN);
|
|
|
|
struct nx842_coproc {
|
|
unsigned int chip_id;
|
|
unsigned int ct;
|
|
unsigned int ci; /* Coprocessor instance, used with icswx */
|
|
struct {
|
|
struct vas_window *rxwin;
|
|
int id;
|
|
} vas;
|
|
struct list_head list;
|
|
};
|
|
|
|
/*
|
|
* Send the request to NX engine on the chip for the corresponding CPU
|
|
* where the process is executing. Use with VAS function.
|
|
*/
|
|
static DEFINE_PER_CPU(struct vas_window *, cpu_txwin);
|
|
|
|
/* no cpu hotplug on powernv, so this list never changes after init */
|
|
static LIST_HEAD(nx842_coprocs);
|
|
static unsigned int nx842_ct; /* used in icswx function */
|
|
|
|
static int (*nx842_powernv_exec)(const unsigned char *in,
|
|
unsigned int inlen, unsigned char *out,
|
|
unsigned int *outlenp, void *workmem, int fc);
|
|
|
|
/**
|
|
* setup_indirect_dde - Setup an indirect DDE
|
|
*
|
|
* The DDE is setup with the the DDE count, byte count, and address of
|
|
* first direct DDE in the list.
|
|
*/
|
|
static void setup_indirect_dde(struct data_descriptor_entry *dde,
|
|
struct data_descriptor_entry *ddl,
|
|
unsigned int dde_count, unsigned int byte_count)
|
|
{
|
|
dde->flags = 0;
|
|
dde->count = dde_count;
|
|
dde->index = 0;
|
|
dde->length = cpu_to_be32(byte_count);
|
|
dde->address = cpu_to_be64(nx842_get_pa(ddl));
|
|
}
|
|
|
|
/**
|
|
* setup_direct_dde - Setup single DDE from buffer
|
|
*
|
|
* The DDE is setup with the buffer and length. The buffer must be properly
|
|
* aligned. The used length is returned.
|
|
* Returns:
|
|
* N Successfully set up DDE with N bytes
|
|
*/
|
|
static unsigned int setup_direct_dde(struct data_descriptor_entry *dde,
|
|
unsigned long pa, unsigned int len)
|
|
{
|
|
unsigned int l = min_t(unsigned int, len, LEN_ON_PAGE(pa));
|
|
|
|
dde->flags = 0;
|
|
dde->count = 0;
|
|
dde->index = 0;
|
|
dde->length = cpu_to_be32(l);
|
|
dde->address = cpu_to_be64(pa);
|
|
|
|
return l;
|
|
}
|
|
|
|
/**
|
|
* setup_ddl - Setup DDL from buffer
|
|
*
|
|
* Returns:
|
|
* 0 Successfully set up DDL
|
|
*/
|
|
static int setup_ddl(struct data_descriptor_entry *dde,
|
|
struct data_descriptor_entry *ddl,
|
|
unsigned char *buf, unsigned int len,
|
|
bool in)
|
|
{
|
|
unsigned long pa = nx842_get_pa(buf);
|
|
int i, ret, total_len = len;
|
|
|
|
if (!IS_ALIGNED(pa, DDE_BUFFER_ALIGN)) {
|
|
pr_debug("%s buffer pa 0x%lx not 0x%x-byte aligned\n",
|
|
in ? "input" : "output", pa, DDE_BUFFER_ALIGN);
|
|
return -EINVAL;
|
|
}
|
|
|
|
/* only need to check last mult; since buffer must be
|
|
* DDE_BUFFER_ALIGN aligned, and that is a multiple of
|
|
* DDE_BUFFER_SIZE_MULT, and pre-last page DDE buffers
|
|
* are guaranteed a multiple of DDE_BUFFER_SIZE_MULT.
|
|
*/
|
|
if (len % DDE_BUFFER_LAST_MULT) {
|
|
pr_debug("%s buffer len 0x%x not a multiple of 0x%x\n",
|
|
in ? "input" : "output", len, DDE_BUFFER_LAST_MULT);
|
|
if (in)
|
|
return -EINVAL;
|
|
len = round_down(len, DDE_BUFFER_LAST_MULT);
|
|
}
|
|
|
|
/* use a single direct DDE */
|
|
if (len <= LEN_ON_PAGE(pa)) {
|
|
ret = setup_direct_dde(dde, pa, len);
|
|
WARN_ON(ret < len);
|
|
return 0;
|
|
}
|
|
|
|
/* use the DDL */
|
|
for (i = 0; i < DDL_LEN_MAX && len > 0; i++) {
|
|
ret = setup_direct_dde(&ddl[i], pa, len);
|
|
buf += ret;
|
|
len -= ret;
|
|
pa = nx842_get_pa(buf);
|
|
}
|
|
|
|
if (len > 0) {
|
|
pr_debug("0x%x total %s bytes 0x%x too many for DDL.\n",
|
|
total_len, in ? "input" : "output", len);
|
|
if (in)
|
|
return -EMSGSIZE;
|
|
total_len -= len;
|
|
}
|
|
setup_indirect_dde(dde, ddl, i, total_len);
|
|
|
|
return 0;
|
|
}
|
|
|
|
#define CSB_ERR(csb, msg, ...) \
|
|
pr_err("ERROR: " msg " : %02x %02x %02x %02x %08x\n", \
|
|
##__VA_ARGS__, (csb)->flags, \
|
|
(csb)->cs, (csb)->cc, (csb)->ce, \
|
|
be32_to_cpu((csb)->count))
|
|
|
|
#define CSB_ERR_ADDR(csb, msg, ...) \
|
|
CSB_ERR(csb, msg " at %lx", ##__VA_ARGS__, \
|
|
(unsigned long)be64_to_cpu((csb)->address))
|
|
|
|
/**
|
|
* wait_for_csb
|
|
*/
|
|
static int wait_for_csb(struct nx842_workmem *wmem,
|
|
struct coprocessor_status_block *csb)
|
|
{
|
|
ktime_t start = wmem->start, now = ktime_get();
|
|
ktime_t timeout = ktime_add_ms(start, CSB_WAIT_MAX);
|
|
|
|
while (!(READ_ONCE(csb->flags) & CSB_V)) {
|
|
cpu_relax();
|
|
now = ktime_get();
|
|
if (ktime_after(now, timeout))
|
|
break;
|
|
}
|
|
|
|
/* hw has updated csb and output buffer */
|
|
barrier();
|
|
|
|
/* check CSB flags */
|
|
if (!(csb->flags & CSB_V)) {
|
|
CSB_ERR(csb, "CSB still not valid after %ld us, giving up",
|
|
(long)ktime_us_delta(now, start));
|
|
return -ETIMEDOUT;
|
|
}
|
|
if (csb->flags & CSB_F) {
|
|
CSB_ERR(csb, "Invalid CSB format");
|
|
return -EPROTO;
|
|
}
|
|
if (csb->flags & CSB_CH) {
|
|
CSB_ERR(csb, "Invalid CSB chaining state");
|
|
return -EPROTO;
|
|
}
|
|
|
|
/* verify CSB completion sequence is 0 */
|
|
if (csb->cs) {
|
|
CSB_ERR(csb, "Invalid CSB completion sequence");
|
|
return -EPROTO;
|
|
}
|
|
|
|
/* check CSB Completion Code */
|
|
switch (csb->cc) {
|
|
/* no error */
|
|
case CSB_CC_SUCCESS:
|
|
break;
|
|
case CSB_CC_TPBC_GT_SPBC:
|
|
/* not an error, but the compressed data is
|
|
* larger than the uncompressed data :(
|
|
*/
|
|
break;
|
|
|
|
/* input data errors */
|
|
case CSB_CC_OPERAND_OVERLAP:
|
|
/* input and output buffers overlap */
|
|
CSB_ERR(csb, "Operand Overlap error");
|
|
return -EINVAL;
|
|
case CSB_CC_INVALID_OPERAND:
|
|
CSB_ERR(csb, "Invalid operand");
|
|
return -EINVAL;
|
|
case CSB_CC_NOSPC:
|
|
/* output buffer too small */
|
|
return -ENOSPC;
|
|
case CSB_CC_ABORT:
|
|
CSB_ERR(csb, "Function aborted");
|
|
return -EINTR;
|
|
case CSB_CC_CRC_MISMATCH:
|
|
CSB_ERR(csb, "CRC mismatch");
|
|
return -EINVAL;
|
|
case CSB_CC_TEMPL_INVALID:
|
|
CSB_ERR(csb, "Compressed data template invalid");
|
|
return -EINVAL;
|
|
case CSB_CC_TEMPL_OVERFLOW:
|
|
CSB_ERR(csb, "Compressed data template shows data past end");
|
|
return -EINVAL;
|
|
case CSB_CC_EXCEED_BYTE_COUNT: /* P9 or later */
|
|
/*
|
|
* DDE byte count exceeds the limit specified in Maximum
|
|
* byte count register.
|
|
*/
|
|
CSB_ERR(csb, "DDE byte count exceeds the limit");
|
|
return -EINVAL;
|
|
|
|
/* these should not happen */
|
|
case CSB_CC_INVALID_ALIGN:
|
|
/* setup_ddl should have detected this */
|
|
CSB_ERR_ADDR(csb, "Invalid alignment");
|
|
return -EINVAL;
|
|
case CSB_CC_DATA_LENGTH:
|
|
/* setup_ddl should have detected this */
|
|
CSB_ERR(csb, "Invalid data length");
|
|
return -EINVAL;
|
|
case CSB_CC_WR_TRANSLATION:
|
|
case CSB_CC_TRANSLATION:
|
|
case CSB_CC_TRANSLATION_DUP1:
|
|
case CSB_CC_TRANSLATION_DUP2:
|
|
case CSB_CC_TRANSLATION_DUP3:
|
|
case CSB_CC_TRANSLATION_DUP4:
|
|
case CSB_CC_TRANSLATION_DUP5:
|
|
case CSB_CC_TRANSLATION_DUP6:
|
|
/* should not happen, we use physical addrs */
|
|
CSB_ERR_ADDR(csb, "Translation error");
|
|
return -EPROTO;
|
|
case CSB_CC_WR_PROTECTION:
|
|
case CSB_CC_PROTECTION:
|
|
case CSB_CC_PROTECTION_DUP1:
|
|
case CSB_CC_PROTECTION_DUP2:
|
|
case CSB_CC_PROTECTION_DUP3:
|
|
case CSB_CC_PROTECTION_DUP4:
|
|
case CSB_CC_PROTECTION_DUP5:
|
|
case CSB_CC_PROTECTION_DUP6:
|
|
/* should not happen, we use physical addrs */
|
|
CSB_ERR_ADDR(csb, "Protection error");
|
|
return -EPROTO;
|
|
case CSB_CC_PRIVILEGE:
|
|
/* shouldn't happen, we're in HYP mode */
|
|
CSB_ERR(csb, "Insufficient Privilege error");
|
|
return -EPROTO;
|
|
case CSB_CC_EXCESSIVE_DDE:
|
|
/* shouldn't happen, setup_ddl doesn't use many dde's */
|
|
CSB_ERR(csb, "Too many DDEs in DDL");
|
|
return -EINVAL;
|
|
case CSB_CC_TRANSPORT:
|
|
case CSB_CC_INVALID_CRB: /* P9 or later */
|
|
/* shouldn't happen, we setup CRB correctly */
|
|
CSB_ERR(csb, "Invalid CRB");
|
|
return -EINVAL;
|
|
case CSB_CC_INVALID_DDE: /* P9 or later */
|
|
/*
|
|
* shouldn't happen, setup_direct/indirect_dde creates
|
|
* DDE right
|
|
*/
|
|
CSB_ERR(csb, "Invalid DDE");
|
|
return -EINVAL;
|
|
case CSB_CC_SEGMENTED_DDL:
|
|
/* shouldn't happen, setup_ddl creates DDL right */
|
|
CSB_ERR(csb, "Segmented DDL error");
|
|
return -EINVAL;
|
|
case CSB_CC_DDE_OVERFLOW:
|
|
/* shouldn't happen, setup_ddl creates DDL right */
|
|
CSB_ERR(csb, "DDE overflow error");
|
|
return -EINVAL;
|
|
case CSB_CC_SESSION:
|
|
/* should not happen with ICSWX */
|
|
CSB_ERR(csb, "Session violation error");
|
|
return -EPROTO;
|
|
case CSB_CC_CHAIN:
|
|
/* should not happen, we don't use chained CRBs */
|
|
CSB_ERR(csb, "Chained CRB error");
|
|
return -EPROTO;
|
|
case CSB_CC_SEQUENCE:
|
|
/* should not happen, we don't use chained CRBs */
|
|
CSB_ERR(csb, "CRB sequence number error");
|
|
return -EPROTO;
|
|
case CSB_CC_UNKNOWN_CODE:
|
|
CSB_ERR(csb, "Unknown subfunction code");
|
|
return -EPROTO;
|
|
|
|
/* hardware errors */
|
|
case CSB_CC_RD_EXTERNAL:
|
|
case CSB_CC_RD_EXTERNAL_DUP1:
|
|
case CSB_CC_RD_EXTERNAL_DUP2:
|
|
case CSB_CC_RD_EXTERNAL_DUP3:
|
|
CSB_ERR_ADDR(csb, "Read error outside coprocessor");
|
|
return -EPROTO;
|
|
case CSB_CC_WR_EXTERNAL:
|
|
CSB_ERR_ADDR(csb, "Write error outside coprocessor");
|
|
return -EPROTO;
|
|
case CSB_CC_INTERNAL:
|
|
CSB_ERR(csb, "Internal error in coprocessor");
|
|
return -EPROTO;
|
|
case CSB_CC_PROVISION:
|
|
CSB_ERR(csb, "Storage provision error");
|
|
return -EPROTO;
|
|
case CSB_CC_HW:
|
|
CSB_ERR(csb, "Correctable hardware error");
|
|
return -EPROTO;
|
|
case CSB_CC_HW_EXPIRED_TIMER: /* P9 or later */
|
|
CSB_ERR(csb, "Job did not finish within allowed time");
|
|
return -EPROTO;
|
|
|
|
default:
|
|
CSB_ERR(csb, "Invalid CC %d", csb->cc);
|
|
return -EPROTO;
|
|
}
|
|
|
|
/* check Completion Extension state */
|
|
if (csb->ce & CSB_CE_TERMINATION) {
|
|
CSB_ERR(csb, "CSB request was terminated");
|
|
return -EPROTO;
|
|
}
|
|
if (csb->ce & CSB_CE_INCOMPLETE) {
|
|
CSB_ERR(csb, "CSB request not complete");
|
|
return -EPROTO;
|
|
}
|
|
if (!(csb->ce & CSB_CE_TPBC)) {
|
|
CSB_ERR(csb, "TPBC not provided, unknown target length");
|
|
return -EPROTO;
|
|
}
|
|
|
|
/* successful completion */
|
|
pr_debug_ratelimited("Processed %u bytes in %lu us\n",
|
|
be32_to_cpu(csb->count),
|
|
(unsigned long)ktime_us_delta(now, start));
|
|
|
|
return 0;
|
|
}
|
|
|
|
static int nx842_config_crb(const unsigned char *in, unsigned int inlen,
|
|
unsigned char *out, unsigned int outlen,
|
|
struct nx842_workmem *wmem)
|
|
{
|
|
struct coprocessor_request_block *crb;
|
|
struct coprocessor_status_block *csb;
|
|
u64 csb_addr;
|
|
int ret;
|
|
|
|
crb = &wmem->crb;
|
|
csb = &crb->csb;
|
|
|
|
/* Clear any previous values */
|
|
memset(crb, 0, sizeof(*crb));
|
|
|
|
/* set up DDLs */
|
|
ret = setup_ddl(&crb->source, wmem->ddl_in,
|
|
(unsigned char *)in, inlen, true);
|
|
if (ret)
|
|
return ret;
|
|
|
|
ret = setup_ddl(&crb->target, wmem->ddl_out,
|
|
out, outlen, false);
|
|
if (ret)
|
|
return ret;
|
|
|
|
/* set up CRB's CSB addr */
|
|
csb_addr = nx842_get_pa(csb) & CRB_CSB_ADDRESS;
|
|
csb_addr |= CRB_CSB_AT; /* Addrs are phys */
|
|
crb->csb_addr = cpu_to_be64(csb_addr);
|
|
|
|
return 0;
|
|
}
|
|
|
|
/**
|
|
* nx842_exec_icswx - compress/decompress data using the 842 algorithm
|
|
*
|
|
* (De)compression provided by the NX842 coprocessor on IBM PowerNV systems.
|
|
* This compresses or decompresses the provided input buffer into the provided
|
|
* output buffer.
|
|
*
|
|
* Upon return from this function @outlen contains the length of the
|
|
* output data. If there is an error then @outlen will be 0 and an
|
|
* error will be specified by the return code from this function.
|
|
*
|
|
* The @workmem buffer should only be used by one function call at a time.
|
|
*
|
|
* @in: input buffer pointer
|
|
* @inlen: input buffer size
|
|
* @out: output buffer pointer
|
|
* @outlenp: output buffer size pointer
|
|
* @workmem: working memory buffer pointer, size determined by
|
|
* nx842_powernv_driver.workmem_size
|
|
* @fc: function code, see CCW Function Codes in nx-842.h
|
|
*
|
|
* Returns:
|
|
* 0 Success, output of length @outlenp stored in the buffer at @out
|
|
* -ENODEV Hardware unavailable
|
|
* -ENOSPC Output buffer is to small
|
|
* -EMSGSIZE Input buffer too large
|
|
* -EINVAL buffer constraints do not fix nx842_constraints
|
|
* -EPROTO hardware error during operation
|
|
* -ETIMEDOUT hardware did not complete operation in reasonable time
|
|
* -EINTR operation was aborted
|
|
*/
|
|
static int nx842_exec_icswx(const unsigned char *in, unsigned int inlen,
|
|
unsigned char *out, unsigned int *outlenp,
|
|
void *workmem, int fc)
|
|
{
|
|
struct coprocessor_request_block *crb;
|
|
struct coprocessor_status_block *csb;
|
|
struct nx842_workmem *wmem;
|
|
int ret;
|
|
u32 ccw;
|
|
unsigned int outlen = *outlenp;
|
|
|
|
wmem = PTR_ALIGN(workmem, WORKMEM_ALIGN);
|
|
|
|
*outlenp = 0;
|
|
|
|
/* shoudn't happen, we don't load without a coproc */
|
|
if (!nx842_ct) {
|
|
pr_err_ratelimited("coprocessor CT is 0");
|
|
return -ENODEV;
|
|
}
|
|
|
|
ret = nx842_config_crb(in, inlen, out, outlen, wmem);
|
|
if (ret)
|
|
return ret;
|
|
|
|
crb = &wmem->crb;
|
|
csb = &crb->csb;
|
|
|
|
/* set up CCW */
|
|
ccw = 0;
|
|
ccw = SET_FIELD(CCW_CT, ccw, nx842_ct);
|
|
ccw = SET_FIELD(CCW_CI_842, ccw, 0); /* use 0 for hw auto-selection */
|
|
ccw = SET_FIELD(CCW_FC_842, ccw, fc);
|
|
|
|
wmem->start = ktime_get();
|
|
|
|
/* do ICSWX */
|
|
ret = icswx(cpu_to_be32(ccw), crb);
|
|
|
|
pr_debug_ratelimited("icswx CR %x ccw %x crb->ccw %x\n", ret,
|
|
(unsigned int)ccw,
|
|
(unsigned int)be32_to_cpu(crb->ccw));
|
|
|
|
/*
|
|
* NX842 coprocessor sets 3rd bit in CR register with XER[S0].
|
|
* XER[S0] is the integer summary overflow bit which is nothing
|
|
* to do NX. Since this bit can be set with other return values,
|
|
* mask this bit.
|
|
*/
|
|
ret &= ~ICSWX_XERS0;
|
|
|
|
switch (ret) {
|
|
case ICSWX_INITIATED:
|
|
ret = wait_for_csb(wmem, csb);
|
|
break;
|
|
case ICSWX_BUSY:
|
|
pr_debug_ratelimited("842 Coprocessor busy\n");
|
|
ret = -EBUSY;
|
|
break;
|
|
case ICSWX_REJECTED:
|
|
pr_err_ratelimited("ICSWX rejected\n");
|
|
ret = -EPROTO;
|
|
break;
|
|
}
|
|
|
|
if (!ret)
|
|
*outlenp = be32_to_cpu(csb->count);
|
|
|
|
return ret;
|
|
}
|
|
|
|
/**
|
|
* nx842_exec_vas - compress/decompress data using the 842 algorithm
|
|
*
|
|
* (De)compression provided by the NX842 coprocessor on IBM PowerNV systems.
|
|
* This compresses or decompresses the provided input buffer into the provided
|
|
* output buffer.
|
|
*
|
|
* Upon return from this function @outlen contains the length of the
|
|
* output data. If there is an error then @outlen will be 0 and an
|
|
* error will be specified by the return code from this function.
|
|
*
|
|
* The @workmem buffer should only be used by one function call at a time.
|
|
*
|
|
* @in: input buffer pointer
|
|
* @inlen: input buffer size
|
|
* @out: output buffer pointer
|
|
* @outlenp: output buffer size pointer
|
|
* @workmem: working memory buffer pointer, size determined by
|
|
* nx842_powernv_driver.workmem_size
|
|
* @fc: function code, see CCW Function Codes in nx-842.h
|
|
*
|
|
* Returns:
|
|
* 0 Success, output of length @outlenp stored in the buffer
|
|
* at @out
|
|
* -ENODEV Hardware unavailable
|
|
* -ENOSPC Output buffer is to small
|
|
* -EMSGSIZE Input buffer too large
|
|
* -EINVAL buffer constraints do not fix nx842_constraints
|
|
* -EPROTO hardware error during operation
|
|
* -ETIMEDOUT hardware did not complete operation in reasonable time
|
|
* -EINTR operation was aborted
|
|
*/
|
|
static int nx842_exec_vas(const unsigned char *in, unsigned int inlen,
|
|
unsigned char *out, unsigned int *outlenp,
|
|
void *workmem, int fc)
|
|
{
|
|
struct coprocessor_request_block *crb;
|
|
struct coprocessor_status_block *csb;
|
|
struct nx842_workmem *wmem;
|
|
struct vas_window *txwin;
|
|
int ret, i = 0;
|
|
u32 ccw;
|
|
unsigned int outlen = *outlenp;
|
|
|
|
wmem = PTR_ALIGN(workmem, WORKMEM_ALIGN);
|
|
|
|
*outlenp = 0;
|
|
|
|
crb = &wmem->crb;
|
|
csb = &crb->csb;
|
|
|
|
ret = nx842_config_crb(in, inlen, out, outlen, wmem);
|
|
if (ret)
|
|
return ret;
|
|
|
|
ccw = 0;
|
|
ccw = SET_FIELD(CCW_FC_842, ccw, fc);
|
|
crb->ccw = cpu_to_be32(ccw);
|
|
|
|
do {
|
|
wmem->start = ktime_get();
|
|
preempt_disable();
|
|
txwin = this_cpu_read(cpu_txwin);
|
|
|
|
/*
|
|
* VAS copy CRB into L2 cache. Refer <asm/vas.h>.
|
|
* @crb and @offset.
|
|
*/
|
|
vas_copy_crb(crb, 0);
|
|
|
|
/*
|
|
* VAS paste previously copied CRB to NX.
|
|
* @txwin, @offset and @last (must be true).
|
|
*/
|
|
ret = vas_paste_crb(txwin, 0, 1);
|
|
preempt_enable();
|
|
/*
|
|
* Retry copy/paste function for VAS failures.
|
|
*/
|
|
} while (ret && (i++ < VAS_RETRIES));
|
|
|
|
if (ret) {
|
|
pr_err_ratelimited("VAS copy/paste failed\n");
|
|
return ret;
|
|
}
|
|
|
|
ret = wait_for_csb(wmem, csb);
|
|
if (!ret)
|
|
*outlenp = be32_to_cpu(csb->count);
|
|
|
|
return ret;
|
|
}
|
|
|
|
/**
|
|
* nx842_powernv_compress - Compress data using the 842 algorithm
|
|
*
|
|
* Compression provided by the NX842 coprocessor on IBM PowerNV systems.
|
|
* The input buffer is compressed and the result is stored in the
|
|
* provided output buffer.
|
|
*
|
|
* Upon return from this function @outlen contains the length of the
|
|
* compressed data. If there is an error then @outlen will be 0 and an
|
|
* error will be specified by the return code from this function.
|
|
*
|
|
* @in: input buffer pointer
|
|
* @inlen: input buffer size
|
|
* @out: output buffer pointer
|
|
* @outlenp: output buffer size pointer
|
|
* @workmem: working memory buffer pointer, size determined by
|
|
* nx842_powernv_driver.workmem_size
|
|
*
|
|
* Returns: see @nx842_powernv_exec()
|
|
*/
|
|
static int nx842_powernv_compress(const unsigned char *in, unsigned int inlen,
|
|
unsigned char *out, unsigned int *outlenp,
|
|
void *wmem)
|
|
{
|
|
return nx842_powernv_exec(in, inlen, out, outlenp,
|
|
wmem, CCW_FC_842_COMP_CRC);
|
|
}
|
|
|
|
/**
|
|
* nx842_powernv_decompress - Decompress data using the 842 algorithm
|
|
*
|
|
* Decompression provided by the NX842 coprocessor on IBM PowerNV systems.
|
|
* The input buffer is decompressed and the result is stored in the
|
|
* provided output buffer.
|
|
*
|
|
* Upon return from this function @outlen contains the length of the
|
|
* decompressed data. If there is an error then @outlen will be 0 and an
|
|
* error will be specified by the return code from this function.
|
|
*
|
|
* @in: input buffer pointer
|
|
* @inlen: input buffer size
|
|
* @out: output buffer pointer
|
|
* @outlenp: output buffer size pointer
|
|
* @workmem: working memory buffer pointer, size determined by
|
|
* nx842_powernv_driver.workmem_size
|
|
*
|
|
* Returns: see @nx842_powernv_exec()
|
|
*/
|
|
static int nx842_powernv_decompress(const unsigned char *in, unsigned int inlen,
|
|
unsigned char *out, unsigned int *outlenp,
|
|
void *wmem)
|
|
{
|
|
return nx842_powernv_exec(in, inlen, out, outlenp,
|
|
wmem, CCW_FC_842_DECOMP_CRC);
|
|
}
|
|
|
|
static inline void nx842_add_coprocs_list(struct nx842_coproc *coproc,
|
|
int chipid)
|
|
{
|
|
coproc->chip_id = chipid;
|
|
INIT_LIST_HEAD(&coproc->list);
|
|
list_add(&coproc->list, &nx842_coprocs);
|
|
}
|
|
|
|
static struct vas_window *nx842_alloc_txwin(struct nx842_coproc *coproc)
|
|
{
|
|
struct vas_window *txwin = NULL;
|
|
struct vas_tx_win_attr txattr;
|
|
|
|
/*
|
|
* Kernel requests will be high priority. So open send
|
|
* windows only for high priority RxFIFO entries.
|
|
*/
|
|
vas_init_tx_win_attr(&txattr, coproc->ct);
|
|
txattr.lpid = 0; /* lpid is 0 for kernel requests */
|
|
txattr.pid = 0; /* pid is 0 for kernel requests */
|
|
|
|
/*
|
|
* Open a VAS send window which is used to send request to NX.
|
|
*/
|
|
txwin = vas_tx_win_open(coproc->vas.id, coproc->ct, &txattr);
|
|
if (IS_ERR(txwin))
|
|
pr_err("ibm,nx-842: Can not open TX window: %ld\n",
|
|
PTR_ERR(txwin));
|
|
|
|
return txwin;
|
|
}
|
|
|
|
/*
|
|
* Identify chip ID for each CPU, open send wndow for the corresponding NX
|
|
* engine and save txwin in percpu cpu_txwin.
|
|
* cpu_txwin is used in copy/paste operation for each compression /
|
|
* decompression request.
|
|
*/
|
|
static int nx842_open_percpu_txwins(void)
|
|
{
|
|
struct nx842_coproc *coproc, *n;
|
|
unsigned int i, chip_id;
|
|
|
|
for_each_possible_cpu(i) {
|
|
struct vas_window *txwin = NULL;
|
|
|
|
chip_id = cpu_to_chip_id(i);
|
|
|
|
list_for_each_entry_safe(coproc, n, &nx842_coprocs, list) {
|
|
/*
|
|
* Kernel requests use only high priority FIFOs. So
|
|
* open send windows for these FIFOs.
|
|
*/
|
|
|
|
if (coproc->ct != VAS_COP_TYPE_842_HIPRI)
|
|
continue;
|
|
|
|
if (coproc->chip_id == chip_id) {
|
|
txwin = nx842_alloc_txwin(coproc);
|
|
if (IS_ERR(txwin))
|
|
return PTR_ERR(txwin);
|
|
|
|
per_cpu(cpu_txwin, i) = txwin;
|
|
break;
|
|
}
|
|
}
|
|
|
|
if (!per_cpu(cpu_txwin, i)) {
|
|
/* shouldn't happen, Each chip will have NX engine */
|
|
pr_err("NX engine is not available for CPU %d\n", i);
|
|
return -EINVAL;
|
|
}
|
|
}
|
|
|
|
return 0;
|
|
}
|
|
|
|
static int __init vas_cfg_coproc_info(struct device_node *dn, int chip_id,
|
|
int vasid, int *ct)
|
|
{
|
|
struct vas_window *rxwin = NULL;
|
|
struct vas_rx_win_attr rxattr;
|
|
struct nx842_coproc *coproc;
|
|
u32 lpid, pid, tid, fifo_size;
|
|
u64 rx_fifo;
|
|
const char *priority;
|
|
int ret;
|
|
|
|
ret = of_property_read_u64(dn, "rx-fifo-address", &rx_fifo);
|
|
if (ret) {
|
|
pr_err("Missing rx-fifo-address property\n");
|
|
return ret;
|
|
}
|
|
|
|
ret = of_property_read_u32(dn, "rx-fifo-size", &fifo_size);
|
|
if (ret) {
|
|
pr_err("Missing rx-fifo-size property\n");
|
|
return ret;
|
|
}
|
|
|
|
ret = of_property_read_u32(dn, "lpid", &lpid);
|
|
if (ret) {
|
|
pr_err("Missing lpid property\n");
|
|
return ret;
|
|
}
|
|
|
|
ret = of_property_read_u32(dn, "pid", &pid);
|
|
if (ret) {
|
|
pr_err("Missing pid property\n");
|
|
return ret;
|
|
}
|
|
|
|
ret = of_property_read_u32(dn, "tid", &tid);
|
|
if (ret) {
|
|
pr_err("Missing tid property\n");
|
|
return ret;
|
|
}
|
|
|
|
ret = of_property_read_string(dn, "priority", &priority);
|
|
if (ret) {
|
|
pr_err("Missing priority property\n");
|
|
return ret;
|
|
}
|
|
|
|
coproc = kzalloc(sizeof(*coproc), GFP_KERNEL);
|
|
if (!coproc)
|
|
return -ENOMEM;
|
|
|
|
if (!strcmp(priority, "High"))
|
|
coproc->ct = VAS_COP_TYPE_842_HIPRI;
|
|
else if (!strcmp(priority, "Normal"))
|
|
coproc->ct = VAS_COP_TYPE_842;
|
|
else {
|
|
pr_err("Invalid RxFIFO priority value\n");
|
|
ret = -EINVAL;
|
|
goto err_out;
|
|
}
|
|
|
|
vas_init_rx_win_attr(&rxattr, coproc->ct);
|
|
rxattr.rx_fifo = (void *)rx_fifo;
|
|
rxattr.rx_fifo_size = fifo_size;
|
|
rxattr.lnotify_lpid = lpid;
|
|
rxattr.lnotify_pid = pid;
|
|
rxattr.lnotify_tid = tid;
|
|
/*
|
|
* Maximum RX window credits can not be more than #CRBs in
|
|
* RxFIFO. Otherwise, can get checkstop if RxFIFO overruns.
|
|
*/
|
|
rxattr.wcreds_max = fifo_size / CRB_SIZE;
|
|
|
|
/*
|
|
* Open a VAS receice window which is used to configure RxFIFO
|
|
* for NX.
|
|
*/
|
|
rxwin = vas_rx_win_open(vasid, coproc->ct, &rxattr);
|
|
if (IS_ERR(rxwin)) {
|
|
ret = PTR_ERR(rxwin);
|
|
pr_err("setting RxFIFO with VAS failed: %d\n",
|
|
ret);
|
|
goto err_out;
|
|
}
|
|
|
|
coproc->vas.rxwin = rxwin;
|
|
coproc->vas.id = vasid;
|
|
nx842_add_coprocs_list(coproc, chip_id);
|
|
|
|
/*
|
|
* (lpid, pid, tid) combination has to be unique for each
|
|
* coprocessor instance in the system. So to make it
|
|
* unique, skiboot uses coprocessor type such as 842 or
|
|
* GZIP for pid and provides this value to kernel in pid
|
|
* device-tree property.
|
|
*/
|
|
*ct = pid;
|
|
|
|
return 0;
|
|
|
|
err_out:
|
|
kfree(coproc);
|
|
return ret;
|
|
}
|
|
|
|
|
|
static int __init nx842_powernv_probe_vas(struct device_node *pn)
|
|
{
|
|
struct device_node *dn;
|
|
int chip_id, vasid, ret = 0;
|
|
int nx_fifo_found = 0;
|
|
int uninitialized_var(ct);
|
|
|
|
chip_id = of_get_ibm_chip_id(pn);
|
|
if (chip_id < 0) {
|
|
pr_err("ibm,chip-id missing\n");
|
|
return -EINVAL;
|
|
}
|
|
|
|
vasid = chip_to_vas_id(chip_id);
|
|
if (vasid < 0) {
|
|
pr_err("Unable to map chip_id %d to vasid\n", chip_id);
|
|
return -EINVAL;
|
|
}
|
|
|
|
for_each_child_of_node(pn, dn) {
|
|
if (of_device_is_compatible(dn, "ibm,p9-nx-842")) {
|
|
ret = vas_cfg_coproc_info(dn, chip_id, vasid, &ct);
|
|
if (ret) {
|
|
of_node_put(dn);
|
|
return ret;
|
|
}
|
|
nx_fifo_found++;
|
|
}
|
|
}
|
|
|
|
if (!nx_fifo_found) {
|
|
pr_err("NX842 FIFO nodes are missing\n");
|
|
return -EINVAL;
|
|
}
|
|
|
|
/*
|
|
* Initialize NX instance for both high and normal priority FIFOs.
|
|
*/
|
|
if (opal_check_token(OPAL_NX_COPROC_INIT)) {
|
|
ret = opal_nx_coproc_init(chip_id, ct);
|
|
if (ret) {
|
|
pr_err("Failed to initialize NX for chip(%d): %d\n",
|
|
chip_id, ret);
|
|
ret = opal_error_code(ret);
|
|
}
|
|
} else
|
|
pr_warn("Firmware doesn't support NX initialization\n");
|
|
|
|
return ret;
|
|
}
|
|
|
|
static int __init nx842_powernv_probe(struct device_node *dn)
|
|
{
|
|
struct nx842_coproc *coproc;
|
|
unsigned int ct, ci;
|
|
int chip_id;
|
|
|
|
chip_id = of_get_ibm_chip_id(dn);
|
|
if (chip_id < 0) {
|
|
pr_err("ibm,chip-id missing\n");
|
|
return -EINVAL;
|
|
}
|
|
|
|
if (of_property_read_u32(dn, "ibm,842-coprocessor-type", &ct)) {
|
|
pr_err("ibm,842-coprocessor-type missing\n");
|
|
return -EINVAL;
|
|
}
|
|
|
|
if (of_property_read_u32(dn, "ibm,842-coprocessor-instance", &ci)) {
|
|
pr_err("ibm,842-coprocessor-instance missing\n");
|
|
return -EINVAL;
|
|
}
|
|
|
|
coproc = kmalloc(sizeof(*coproc), GFP_KERNEL);
|
|
if (!coproc)
|
|
return -ENOMEM;
|
|
|
|
coproc->ct = ct;
|
|
coproc->ci = ci;
|
|
nx842_add_coprocs_list(coproc, chip_id);
|
|
|
|
pr_info("coprocessor found on chip %d, CT %d CI %d\n", chip_id, ct, ci);
|
|
|
|
if (!nx842_ct)
|
|
nx842_ct = ct;
|
|
else if (nx842_ct != ct)
|
|
pr_err("NX842 chip %d, CT %d != first found CT %d\n",
|
|
chip_id, ct, nx842_ct);
|
|
|
|
return 0;
|
|
}
|
|
|
|
static void nx842_delete_coprocs(void)
|
|
{
|
|
struct nx842_coproc *coproc, *n;
|
|
struct vas_window *txwin;
|
|
int i;
|
|
|
|
/*
|
|
* close percpu txwins that are opened for the corresponding coproc.
|
|
*/
|
|
for_each_possible_cpu(i) {
|
|
txwin = per_cpu(cpu_txwin, i);
|
|
if (txwin)
|
|
vas_win_close(txwin);
|
|
|
|
per_cpu(cpu_txwin, i) = 0;
|
|
}
|
|
|
|
list_for_each_entry_safe(coproc, n, &nx842_coprocs, list) {
|
|
if (coproc->vas.rxwin)
|
|
vas_win_close(coproc->vas.rxwin);
|
|
|
|
list_del(&coproc->list);
|
|
kfree(coproc);
|
|
}
|
|
}
|
|
|
|
static struct nx842_constraints nx842_powernv_constraints = {
|
|
.alignment = DDE_BUFFER_ALIGN,
|
|
.multiple = DDE_BUFFER_LAST_MULT,
|
|
.minimum = DDE_BUFFER_LAST_MULT,
|
|
.maximum = (DDL_LEN_MAX - 1) * PAGE_SIZE,
|
|
};
|
|
|
|
static struct nx842_driver nx842_powernv_driver = {
|
|
.name = KBUILD_MODNAME,
|
|
.owner = THIS_MODULE,
|
|
.workmem_size = sizeof(struct nx842_workmem),
|
|
.constraints = &nx842_powernv_constraints,
|
|
.compress = nx842_powernv_compress,
|
|
.decompress = nx842_powernv_decompress,
|
|
};
|
|
|
|
static int nx842_powernv_crypto_init(struct crypto_tfm *tfm)
|
|
{
|
|
return nx842_crypto_init(tfm, &nx842_powernv_driver);
|
|
}
|
|
|
|
static struct crypto_alg nx842_powernv_alg = {
|
|
.cra_name = "842",
|
|
.cra_driver_name = "842-nx",
|
|
.cra_priority = 300,
|
|
.cra_flags = CRYPTO_ALG_TYPE_COMPRESS,
|
|
.cra_ctxsize = sizeof(struct nx842_crypto_ctx),
|
|
.cra_module = THIS_MODULE,
|
|
.cra_init = nx842_powernv_crypto_init,
|
|
.cra_exit = nx842_crypto_exit,
|
|
.cra_u = { .compress = {
|
|
.coa_compress = nx842_crypto_compress,
|
|
.coa_decompress = nx842_crypto_decompress } }
|
|
};
|
|
|
|
static __init int nx842_powernv_init(void)
|
|
{
|
|
struct device_node *dn;
|
|
int ret;
|
|
|
|
/* verify workmem size/align restrictions */
|
|
BUILD_BUG_ON(WORKMEM_ALIGN % CRB_ALIGN);
|
|
BUILD_BUG_ON(CRB_ALIGN % DDE_ALIGN);
|
|
BUILD_BUG_ON(CRB_SIZE % DDE_ALIGN);
|
|
/* verify buffer size/align restrictions */
|
|
BUILD_BUG_ON(PAGE_SIZE % DDE_BUFFER_ALIGN);
|
|
BUILD_BUG_ON(DDE_BUFFER_ALIGN % DDE_BUFFER_SIZE_MULT);
|
|
BUILD_BUG_ON(DDE_BUFFER_SIZE_MULT % DDE_BUFFER_LAST_MULT);
|
|
|
|
for_each_compatible_node(dn, NULL, "ibm,power9-nx") {
|
|
ret = nx842_powernv_probe_vas(dn);
|
|
if (ret) {
|
|
nx842_delete_coprocs();
|
|
of_node_put(dn);
|
|
return ret;
|
|
}
|
|
}
|
|
|
|
if (list_empty(&nx842_coprocs)) {
|
|
for_each_compatible_node(dn, NULL, "ibm,power-nx")
|
|
nx842_powernv_probe(dn);
|
|
|
|
if (!nx842_ct)
|
|
return -ENODEV;
|
|
|
|
nx842_powernv_exec = nx842_exec_icswx;
|
|
} else {
|
|
ret = nx842_open_percpu_txwins();
|
|
if (ret) {
|
|
nx842_delete_coprocs();
|
|
return ret;
|
|
}
|
|
|
|
nx842_powernv_exec = nx842_exec_vas;
|
|
}
|
|
|
|
ret = crypto_register_alg(&nx842_powernv_alg);
|
|
if (ret) {
|
|
nx842_delete_coprocs();
|
|
return ret;
|
|
}
|
|
|
|
return 0;
|
|
}
|
|
module_init(nx842_powernv_init);
|
|
|
|
static void __exit nx842_powernv_exit(void)
|
|
{
|
|
crypto_unregister_alg(&nx842_powernv_alg);
|
|
|
|
nx842_delete_coprocs();
|
|
}
|
|
module_exit(nx842_powernv_exit);
|