NFSD 6.5 Release Notes

Fixes and clean-ups include:
 - Clean-ups in the READ path in anticipation of MSG_SPLICE_PAGES
 - Better NUMA awareness when allocating pages and other objects
 - A number of minor clean-ups to XDR encoding
 - Elimination of a race when accepting a TCP socket
 - Numerous observability enhancements
 -----BEGIN PGP SIGNATURE-----
 
 iQIzBAABCAAdFiEEKLLlsBKG3yQ88j7+M2qzM29mf5cFAmSVpEQACgkQM2qzM29m
 f5flNQ/+N+GKwJWKkDDK9l0/OJWLSOV1DaUKs04FW79Xa05bqB9W3mnkQ2QzXmBq
 9BUUeAnn0bD+yDgsBd4+l5HMjK91Qm+e+ljE4Gn+hA2Kg5VsBPbNImXQTuAdC/mw
 2YWNqaQ6EjHWmzauAOiKqKwDsYefZaiS3+1CEuOlmXeDK979yU7zXHUbFvLjmNiP
 5ATDrxfIsQza+Je0sh3JGeJtFZCt127zOd6vEQCiedbC8yy7n1ldWi/OGWPsg/2H
 z/QWbs9Iw8ExESosBKfX6M13izJV9eZ69ZfvxmLFGcfZmHoOCOtIXzDDqmD6JsPY
 l4SBtWkP+OB4jePM1nEpFU65EQrGtRK/roGGXNtCTaZAcDBk2/jJ9b5gejkSDsaU
 B3alN0UwECeEzQ3whYYAGy4m1FPaQcFfxl1RydcvYEfRiuiYxZb/3EO62dDavv14
 WTYUGHNxso48DGngyO2xRBIda0Kwqc4vgkzOww6SP+Eok/22q/CsKHiuzRgnrLq6
 GYdRX0Zmvl/0lzuBsDOzpOIQg2DuGdc84fjUCxqQu0DBSkehWo4i2eGNHQuHbMUb
 Tl58/tYiFazVOX1aAKabzKm2iaCjKdLySDhTj+dSgTXKx6SqZl1cXvhFl9xImgYj
 uW9i+ALZO9PNvtpMIvGaHqQK6xyI9F6isgKyNpXwy9J5D6ck7GA=
 =1EQc
 -----END PGP SIGNATURE-----

Merge tag 'nfsd-6.5' of git://git.kernel.org/pub/scm/linux/kernel/git/cel/linux

Pull nfsd updates from Chuck Lever:

 - Clean-ups in the READ path in anticipation of MSG_SPLICE_PAGES

 - Better NUMA awareness when allocating pages and other objects

 - A number of minor clean-ups to XDR encoding

 - Elimination of a race when accepting a TCP socket

 - Numerous observability enhancements

* tag 'nfsd-6.5' of git://git.kernel.org/pub/scm/linux/kernel/git/cel/linux: (46 commits)
  nfsd: remove redundant assignments to variable len
  svcrdma: Fix stale comment
  NFSD: Distinguish per-net namespace initialization
  nfsd: move init of percpu reply_cache_stats counters back to nfsd_init_net
  SUNRPC: Address RCU warning in net/sunrpc/svc.c
  SUNRPC: Use sysfs_emit in place of strlcpy/sprintf
  SUNRPC: Remove transport class dprintk call sites
  SUNRPC: Fix comments for transport class registration
  svcrdma: Remove an unused argument from __svc_rdma_put_rw_ctxt()
  svcrdma: trace cc_release calls
  svcrdma: Convert "might sleep" comment into a code annotation
  NFSD: Add an nfsd4_encode_nfstime4() helper
  SUNRPC: Move initialization of rq_stime
  SUNRPC: Optimize page release in svc_rdma_sendto()
  svcrdma: Prevent page release when nothing was received
  svcrdma: Revert 2a1e4f21d8 ("svcrdma: Normalize Send page handling")
  SUNRPC: Revert 579900670a ("svcrdma: Remove unused sc_pages field")
  SUNRPC: Revert cc93ce9529 ("svcrdma: Retain the page backing rq_res.head[0].iov_base")
  NFSD: add encoding of op_recall flag for write delegation
  NFSD: Add "official" reviewers for this subsystem
  ...
This commit is contained in:
Linus Torvalds 2023-06-26 10:48:57 -07:00
commit f7976a6493
31 changed files with 802 additions and 435 deletions

View File

@ -183,6 +183,8 @@ Henrik Rydberg <rydberg@bitmath.org>
Herbert Xu <herbert@gondor.apana.org.au> Herbert Xu <herbert@gondor.apana.org.au>
Huacai Chen <chenhuacai@kernel.org> <chenhc@lemote.com> Huacai Chen <chenhuacai@kernel.org> <chenhc@lemote.com>
Huacai Chen <chenhuacai@kernel.org> <chenhuacai@loongson.cn> Huacai Chen <chenhuacai@kernel.org> <chenhuacai@loongson.cn>
J. Bruce Fields <bfields@fieldses.org> <bfields@redhat.com>
J. Bruce Fields <bfields@fieldses.org> <bfields@citi.umich.edu>
Jacob Shin <Jacob.Shin@amd.com> Jacob Shin <Jacob.Shin@amd.com>
Jaegeuk Kim <jaegeuk@kernel.org> <jaegeuk@google.com> Jaegeuk Kim <jaegeuk@kernel.org> <jaegeuk@google.com>
Jaegeuk Kim <jaegeuk@kernel.org> <jaegeuk.kim@samsung.com> Jaegeuk Kim <jaegeuk@kernel.org> <jaegeuk.kim@samsung.com>

View File

@ -11275,6 +11275,10 @@ W: http://kernelnewbies.org/KernelJanitors
KERNEL NFSD, SUNRPC, AND LOCKD SERVERS KERNEL NFSD, SUNRPC, AND LOCKD SERVERS
M: Chuck Lever <chuck.lever@oracle.com> M: Chuck Lever <chuck.lever@oracle.com>
M: Jeff Layton <jlayton@kernel.org> M: Jeff Layton <jlayton@kernel.org>
R: Neil Brown <neilb@suse.de>
R: Olga Kornievskaia <kolga@netapp.com>
R: Dai Ngo <Dai.Ngo@oracle.com>
R: Tom Talpey <tom@talpey.com>
L: linux-nfs@vger.kernel.org L: linux-nfs@vger.kernel.org
S: Supported S: Supported
W: http://nfs.sourceforge.net/ W: http://nfs.sourceforge.net/

View File

@ -355,7 +355,6 @@ static int lockd_get(void)
int error; int error;
if (nlmsvc_serv) { if (nlmsvc_serv) {
svc_get(nlmsvc_serv);
nlmsvc_users++; nlmsvc_users++;
return 0; return 0;
} }

View File

@ -80,6 +80,8 @@ enum {
int nfsd_drc_slab_create(void); int nfsd_drc_slab_create(void);
void nfsd_drc_slab_free(void); void nfsd_drc_slab_free(void);
int nfsd_net_reply_cache_init(struct nfsd_net *nn);
void nfsd_net_reply_cache_destroy(struct nfsd_net *nn);
int nfsd_reply_cache_init(struct nfsd_net *); int nfsd_reply_cache_init(struct nfsd_net *);
void nfsd_reply_cache_shutdown(struct nfsd_net *); void nfsd_reply_cache_shutdown(struct nfsd_net *);
int nfsd_cache_lookup(struct svc_rqst *); int nfsd_cache_lookup(struct svc_rqst *);

View File

@ -97,7 +97,7 @@ static int expkey_parse(struct cache_detail *cd, char *mesg, int mlen)
goto out; goto out;
err = -EINVAL; err = -EINVAL;
if ((len=qword_get(&mesg, buf, PAGE_SIZE)) <= 0) if (qword_get(&mesg, buf, PAGE_SIZE) <= 0)
goto out; goto out;
err = -ENOENT; err = -ENOENT;
@ -107,7 +107,7 @@ static int expkey_parse(struct cache_detail *cd, char *mesg, int mlen)
dprintk("found domain %s\n", buf); dprintk("found domain %s\n", buf);
err = -EINVAL; err = -EINVAL;
if ((len=qword_get(&mesg, buf, PAGE_SIZE)) <= 0) if (qword_get(&mesg, buf, PAGE_SIZE) <= 0)
goto out; goto out;
fsidtype = simple_strtoul(buf, &ep, 10); fsidtype = simple_strtoul(buf, &ep, 10);
if (*ep) if (*ep)
@ -593,7 +593,6 @@ static int svc_export_parse(struct cache_detail *cd, char *mesg, int mlen)
{ {
/* client path expiry [flags anonuid anongid fsid] */ /* client path expiry [flags anonuid anongid fsid] */
char *buf; char *buf;
int len;
int err; int err;
struct auth_domain *dom = NULL; struct auth_domain *dom = NULL;
struct svc_export exp = {}, *expp; struct svc_export exp = {}, *expp;
@ -609,8 +608,7 @@ static int svc_export_parse(struct cache_detail *cd, char *mesg, int mlen)
/* client */ /* client */
err = -EINVAL; err = -EINVAL;
len = qword_get(&mesg, buf, PAGE_SIZE); if (qword_get(&mesg, buf, PAGE_SIZE) <= 0)
if (len <= 0)
goto out; goto out;
err = -ENOENT; err = -ENOENT;
@ -620,7 +618,7 @@ static int svc_export_parse(struct cache_detail *cd, char *mesg, int mlen)
/* path */ /* path */
err = -EINVAL; err = -EINVAL;
if ((len = qword_get(&mesg, buf, PAGE_SIZE)) <= 0) if (qword_get(&mesg, buf, PAGE_SIZE) <= 0)
goto out1; goto out1;
err = kern_path(buf, 0, &exp.ex_path); err = kern_path(buf, 0, &exp.ex_path);
@ -665,7 +663,7 @@ static int svc_export_parse(struct cache_detail *cd, char *mesg, int mlen)
goto out3; goto out3;
exp.ex_fsid = an_int; exp.ex_fsid = an_int;
while ((len = qword_get(&mesg, buf, PAGE_SIZE)) > 0) { while (qword_get(&mesg, buf, PAGE_SIZE) > 0) {
if (strcmp(buf, "fsloc") == 0) if (strcmp(buf, "fsloc") == 0)
err = fsloc_parse(&mesg, buf, &exp.ex_fslocs); err = fsloc_parse(&mesg, buf, &exp.ex_fslocs);
else if (strcmp(buf, "uuid") == 0) else if (strcmp(buf, "uuid") == 0)

View File

@ -151,8 +151,6 @@ nfsd3_proc_read(struct svc_rqst *rqstp)
{ {
struct nfsd3_readargs *argp = rqstp->rq_argp; struct nfsd3_readargs *argp = rqstp->rq_argp;
struct nfsd3_readres *resp = rqstp->rq_resp; struct nfsd3_readres *resp = rqstp->rq_resp;
unsigned int len;
int v;
dprintk("nfsd: READ(3) %s %lu bytes at %Lu\n", dprintk("nfsd: READ(3) %s %lu bytes at %Lu\n",
SVCFH_fmt(&argp->fh), SVCFH_fmt(&argp->fh),
@ -166,17 +164,7 @@ nfsd3_proc_read(struct svc_rqst *rqstp)
if (argp->offset + argp->count > (u64)OFFSET_MAX) if (argp->offset + argp->count > (u64)OFFSET_MAX)
argp->count = (u64)OFFSET_MAX - argp->offset; argp->count = (u64)OFFSET_MAX - argp->offset;
v = 0;
len = argp->count;
resp->pages = rqstp->rq_next_page; resp->pages = rqstp->rq_next_page;
while (len > 0) {
struct page *page = *(rqstp->rq_next_page++);
rqstp->rq_vec[v].iov_base = page_address(page);
rqstp->rq_vec[v].iov_len = min_t(unsigned int, len, PAGE_SIZE);
len -= rqstp->rq_vec[v].iov_len;
v++;
}
/* Obtain buffer pointer for payload. /* Obtain buffer pointer for payload.
* 1 (status) + 22 (post_op_attr) + 1 (count) + 1 (eof) * 1 (status) + 22 (post_op_attr) + 1 (count) + 1 (eof)
@ -187,7 +175,7 @@ nfsd3_proc_read(struct svc_rqst *rqstp)
fh_copy(&resp->fh, &argp->fh); fh_copy(&resp->fh, &argp->fh);
resp->status = nfsd_read(rqstp, &resp->fh, argp->offset, resp->status = nfsd_read(rqstp, &resp->fh, argp->offset,
rqstp->rq_vec, v, &resp->count, &resp->eof); &resp->count, &resp->eof);
return rpc_success; return rpc_success;
} }

View File

@ -828,7 +828,8 @@ nfs3svc_encode_readlinkres(struct svc_rqst *rqstp, struct xdr_stream *xdr)
return false; return false;
if (xdr_stream_encode_u32(xdr, resp->len) < 0) if (xdr_stream_encode_u32(xdr, resp->len) < 0)
return false; return false;
xdr_write_pages(xdr, resp->pages, 0, resp->len); svcxdr_encode_opaque_pages(rqstp, xdr, resp->pages, 0,
resp->len);
if (svc_encode_result_payload(rqstp, head->iov_len, resp->len) < 0) if (svc_encode_result_payload(rqstp, head->iov_len, resp->len) < 0)
return false; return false;
break; break;
@ -859,8 +860,9 @@ nfs3svc_encode_readres(struct svc_rqst *rqstp, struct xdr_stream *xdr)
return false; return false;
if (xdr_stream_encode_u32(xdr, resp->count) < 0) if (xdr_stream_encode_u32(xdr, resp->count) < 0)
return false; return false;
xdr_write_pages(xdr, resp->pages, rqstp->rq_res.page_base, svcxdr_encode_opaque_pages(rqstp, xdr, resp->pages,
resp->count); rqstp->rq_res.page_base,
resp->count);
if (svc_encode_result_payload(rqstp, head->iov_len, resp->count) < 0) if (svc_encode_result_payload(rqstp, head->iov_len, resp->count) < 0)
return false; return false;
break; break;
@ -961,7 +963,8 @@ nfs3svc_encode_readdirres(struct svc_rqst *rqstp, struct xdr_stream *xdr)
return false; return false;
if (!svcxdr_encode_cookieverf3(xdr, resp->verf)) if (!svcxdr_encode_cookieverf3(xdr, resp->verf))
return false; return false;
xdr_write_pages(xdr, dirlist->pages, 0, dirlist->len); svcxdr_encode_opaque_pages(rqstp, xdr, dirlist->pages, 0,
dirlist->len);
/* no more entries */ /* no more entries */
if (xdr_stream_encode_item_absent(xdr) < 0) if (xdr_stream_encode_item_absent(xdr) < 0)
return false; return false;

View File

@ -2541,6 +2541,20 @@ static __be32 *encode_change(__be32 *p, struct kstat *stat, struct inode *inode,
return p; return p;
} }
static __be32 nfsd4_encode_nfstime4(struct xdr_stream *xdr,
struct timespec64 *tv)
{
__be32 *p;
p = xdr_reserve_space(xdr, XDR_UNIT * 3);
if (!p)
return nfserr_resource;
p = xdr_encode_hyper(p, (s64)tv->tv_sec);
*p = cpu_to_be32(tv->tv_nsec);
return nfs_ok;
}
/* /*
* ctime (in NFSv4, time_metadata) is not writeable, and the client * ctime (in NFSv4, time_metadata) is not writeable, and the client
* doesn't really care what resolution could theoretically be stored by * doesn't really care what resolution could theoretically be stored by
@ -2566,12 +2580,16 @@ static __be32 *encode_time_delta(__be32 *p, struct inode *inode)
return p; return p;
} }
static __be32 *encode_cinfo(__be32 *p, struct nfsd4_change_info *c) static __be32
nfsd4_encode_change_info4(struct xdr_stream *xdr, struct nfsd4_change_info *c)
{ {
*p++ = cpu_to_be32(c->atomic); if (xdr_stream_encode_bool(xdr, c->atomic) < 0)
p = xdr_encode_hyper(p, c->before_change); return nfserr_resource;
p = xdr_encode_hyper(p, c->after_change); if (xdr_stream_encode_u64(xdr, c->before_change) < 0)
return p; return nfserr_resource;
if (xdr_stream_encode_u64(xdr, c->after_change) < 0)
return nfserr_resource;
return nfs_ok;
} }
/* Encode as an array of strings the string given with components /* Encode as an array of strings the string given with components
@ -3348,11 +3366,9 @@ out_acl:
p = xdr_encode_hyper(p, dummy64); p = xdr_encode_hyper(p, dummy64);
} }
if (bmval1 & FATTR4_WORD1_TIME_ACCESS) { if (bmval1 & FATTR4_WORD1_TIME_ACCESS) {
p = xdr_reserve_space(xdr, 12); status = nfsd4_encode_nfstime4(xdr, &stat.atime);
if (!p) if (status)
goto out_resource; goto out;
p = xdr_encode_hyper(p, (s64)stat.atime.tv_sec);
*p++ = cpu_to_be32(stat.atime.tv_nsec);
} }
if (bmval1 & FATTR4_WORD1_TIME_DELTA) { if (bmval1 & FATTR4_WORD1_TIME_DELTA) {
p = xdr_reserve_space(xdr, 12); p = xdr_reserve_space(xdr, 12);
@ -3361,25 +3377,19 @@ out_acl:
p = encode_time_delta(p, d_inode(dentry)); p = encode_time_delta(p, d_inode(dentry));
} }
if (bmval1 & FATTR4_WORD1_TIME_METADATA) { if (bmval1 & FATTR4_WORD1_TIME_METADATA) {
p = xdr_reserve_space(xdr, 12); status = nfsd4_encode_nfstime4(xdr, &stat.ctime);
if (!p) if (status)
goto out_resource; goto out;
p = xdr_encode_hyper(p, (s64)stat.ctime.tv_sec);
*p++ = cpu_to_be32(stat.ctime.tv_nsec);
} }
if (bmval1 & FATTR4_WORD1_TIME_MODIFY) { if (bmval1 & FATTR4_WORD1_TIME_MODIFY) {
p = xdr_reserve_space(xdr, 12); status = nfsd4_encode_nfstime4(xdr, &stat.mtime);
if (!p) if (status)
goto out_resource; goto out;
p = xdr_encode_hyper(p, (s64)stat.mtime.tv_sec);
*p++ = cpu_to_be32(stat.mtime.tv_nsec);
} }
if (bmval1 & FATTR4_WORD1_TIME_CREATE) { if (bmval1 & FATTR4_WORD1_TIME_CREATE) {
p = xdr_reserve_space(xdr, 12); status = nfsd4_encode_nfstime4(xdr, &stat.btime);
if (!p) if (status)
goto out_resource; goto out;
p = xdr_encode_hyper(p, (s64)stat.btime.tv_sec);
*p++ = cpu_to_be32(stat.btime.tv_nsec);
} }
if (bmval1 & FATTR4_WORD1_MOUNTED_ON_FILEID) { if (bmval1 & FATTR4_WORD1_MOUNTED_ON_FILEID) {
u64 ino = stat.ino; u64 ino = stat.ino;
@ -3688,6 +3698,30 @@ fail:
return -EINVAL; return -EINVAL;
} }
static __be32
nfsd4_encode_verifier4(struct xdr_stream *xdr, const nfs4_verifier *verf)
{
__be32 *p;
p = xdr_reserve_space(xdr, NFS4_VERIFIER_SIZE);
if (!p)
return nfserr_resource;
memcpy(p, verf->data, sizeof(verf->data));
return nfs_ok;
}
static __be32
nfsd4_encode_clientid4(struct xdr_stream *xdr, const clientid_t *clientid)
{
__be32 *p;
p = xdr_reserve_space(xdr, sizeof(__be64));
if (!p)
return nfserr_resource;
memcpy(p, clientid, sizeof(*clientid));
return nfs_ok;
}
static __be32 static __be32
nfsd4_encode_stateid(struct xdr_stream *xdr, stateid_t *sid) nfsd4_encode_stateid(struct xdr_stream *xdr, stateid_t *sid)
{ {
@ -3752,15 +3786,8 @@ nfsd4_encode_commit(struct nfsd4_compoundres *resp, __be32 nfserr,
union nfsd4_op_u *u) union nfsd4_op_u *u)
{ {
struct nfsd4_commit *commit = &u->commit; struct nfsd4_commit *commit = &u->commit;
struct xdr_stream *xdr = resp->xdr;
__be32 *p;
p = xdr_reserve_space(xdr, NFS4_VERIFIER_SIZE); return nfsd4_encode_verifier4(resp->xdr, &commit->co_verf);
if (!p)
return nfserr_resource;
p = xdr_encode_opaque_fixed(p, commit->co_verf.data,
NFS4_VERIFIER_SIZE);
return 0;
} }
static __be32 static __be32
@ -3769,12 +3796,10 @@ nfsd4_encode_create(struct nfsd4_compoundres *resp, __be32 nfserr,
{ {
struct nfsd4_create *create = &u->create; struct nfsd4_create *create = &u->create;
struct xdr_stream *xdr = resp->xdr; struct xdr_stream *xdr = resp->xdr;
__be32 *p;
p = xdr_reserve_space(xdr, 20); nfserr = nfsd4_encode_change_info4(xdr, &create->cr_cinfo);
if (!p) if (nfserr)
return nfserr_resource; return nfserr;
encode_cinfo(p, &create->cr_cinfo);
return nfsd4_encode_bitmap(xdr, create->cr_bmval[0], return nfsd4_encode_bitmap(xdr, create->cr_bmval[0],
create->cr_bmval[1], create->cr_bmval[2]); create->cr_bmval[1], create->cr_bmval[2]);
} }
@ -3892,13 +3917,8 @@ nfsd4_encode_link(struct nfsd4_compoundres *resp, __be32 nfserr,
{ {
struct nfsd4_link *link = &u->link; struct nfsd4_link *link = &u->link;
struct xdr_stream *xdr = resp->xdr; struct xdr_stream *xdr = resp->xdr;
__be32 *p;
p = xdr_reserve_space(xdr, 20); return nfsd4_encode_change_info4(xdr, &link->li_cinfo);
if (!p)
return nfserr_resource;
p = encode_cinfo(p, &link->li_cinfo);
return 0;
} }
@ -3913,11 +3933,11 @@ nfsd4_encode_open(struct nfsd4_compoundres *resp, __be32 nfserr,
nfserr = nfsd4_encode_stateid(xdr, &open->op_stateid); nfserr = nfsd4_encode_stateid(xdr, &open->op_stateid);
if (nfserr) if (nfserr)
return nfserr; return nfserr;
p = xdr_reserve_space(xdr, 24); nfserr = nfsd4_encode_change_info4(xdr, &open->op_cinfo);
if (!p) if (nfserr)
return nfserr;
if (xdr_stream_encode_u32(xdr, open->op_rflags) < 0)
return nfserr_resource; return nfserr_resource;
p = encode_cinfo(p, &open->op_cinfo);
*p++ = cpu_to_be32(open->op_rflags);
nfserr = nfsd4_encode_bitmap(xdr, open->op_bmval[0], open->op_bmval[1], nfserr = nfsd4_encode_bitmap(xdr, open->op_bmval[0], open->op_bmval[1],
open->op_bmval[2]); open->op_bmval[2]);
@ -3956,7 +3976,7 @@ nfsd4_encode_open(struct nfsd4_compoundres *resp, __be32 nfserr,
p = xdr_reserve_space(xdr, 32); p = xdr_reserve_space(xdr, 32);
if (!p) if (!p)
return nfserr_resource; return nfserr_resource;
*p++ = cpu_to_be32(0); *p++ = cpu_to_be32(open->op_recall);
/* /*
* TODO: space_limit's in delegations * TODO: space_limit's in delegations
@ -4018,6 +4038,11 @@ nfsd4_encode_open_downgrade(struct nfsd4_compoundres *resp, __be32 nfserr,
return nfsd4_encode_stateid(xdr, &od->od_stateid); return nfsd4_encode_stateid(xdr, &od->od_stateid);
} }
/*
* The operation of this function assumes that this is the only
* READ operation in the COMPOUND. If there are multiple READs,
* we use nfsd4_encode_readv().
*/
static __be32 nfsd4_encode_splice_read( static __be32 nfsd4_encode_splice_read(
struct nfsd4_compoundres *resp, struct nfsd4_compoundres *resp,
struct nfsd4_read *read, struct nfsd4_read *read,
@ -4028,8 +4053,12 @@ static __be32 nfsd4_encode_splice_read(
int status, space_left; int status, space_left;
__be32 nfserr; __be32 nfserr;
/* Make sure there will be room for padding if needed */ /*
if (xdr->end - xdr->p < 1) * Make sure there is room at the end of buf->head for
* svcxdr_encode_opaque_pages() to create a tail buffer
* to XDR-pad the payload.
*/
if (xdr->iov != xdr->buf->head || xdr->end - xdr->p < 1)
return nfserr_resource; return nfserr_resource;
nfserr = nfsd_splice_read(read->rd_rqstp, read->rd_fhp, nfserr = nfsd_splice_read(read->rd_rqstp, read->rd_fhp,
@ -4038,6 +4067,8 @@ static __be32 nfsd4_encode_splice_read(
read->rd_length = maxcount; read->rd_length = maxcount;
if (nfserr) if (nfserr)
goto out_err; goto out_err;
svcxdr_encode_opaque_pages(read->rd_rqstp, xdr, buf->pages,
buf->page_base, maxcount);
status = svc_encode_result_payload(read->rd_rqstp, status = svc_encode_result_payload(read->rd_rqstp,
buf->head[0].iov_len, maxcount); buf->head[0].iov_len, maxcount);
if (status) { if (status) {
@ -4045,31 +4076,19 @@ static __be32 nfsd4_encode_splice_read(
goto out_err; goto out_err;
} }
buf->page_len = maxcount; /*
buf->len += maxcount; * Prepare to encode subsequent operations.
xdr->page_ptr += (buf->page_base + maxcount + PAGE_SIZE - 1) *
/ PAGE_SIZE; * xdr_truncate_encode() is not safe to use after a successful
* splice read has been done, so the following stream
/* Use rest of head for padding and remaining ops: */ * manipulations are open-coded.
buf->tail[0].iov_base = xdr->p; */
buf->tail[0].iov_len = 0;
xdr->iov = buf->tail;
if (maxcount&3) {
int pad = 4 - (maxcount&3);
*(xdr->p++) = 0;
buf->tail[0].iov_base += maxcount&3;
buf->tail[0].iov_len = pad;
buf->len += pad;
}
space_left = min_t(int, (void *)xdr->end - (void *)xdr->p, space_left = min_t(int, (void *)xdr->end - (void *)xdr->p,
buf->buflen - buf->len); buf->buflen - buf->len);
buf->buflen = buf->len + space_left; buf->buflen = buf->len + space_left;
xdr->end = (__be32 *)((void *)xdr->end + space_left); xdr->end = (__be32 *)((void *)xdr->end + space_left);
return 0; return nfs_ok;
out_err: out_err:
/* /*
@ -4090,13 +4109,13 @@ static __be32 nfsd4_encode_readv(struct nfsd4_compoundres *resp,
__be32 zero = xdr_zero; __be32 zero = xdr_zero;
__be32 nfserr; __be32 nfserr;
read->rd_vlen = xdr_reserve_space_vec(xdr, resp->rqstp->rq_vec, maxcount); if (xdr_reserve_space_vec(xdr, maxcount) < 0)
if (read->rd_vlen < 0)
return nfserr_resource; return nfserr_resource;
nfserr = nfsd_readv(resp->rqstp, read->rd_fhp, file, read->rd_offset, nfserr = nfsd_iter_read(resp->rqstp, read->rd_fhp, file,
resp->rqstp->rq_vec, read->rd_vlen, &maxcount, read->rd_offset, &maxcount,
&read->rd_eof); xdr->buf->page_len & ~PAGE_MASK,
&read->rd_eof);
read->rd_length = maxcount; read->rd_length = maxcount;
if (nfserr) if (nfserr)
return nfserr; return nfserr;
@ -4213,15 +4232,9 @@ nfsd4_encode_readdir(struct nfsd4_compoundres *resp, __be32 nfserr,
int starting_len = xdr->buf->len; int starting_len = xdr->buf->len;
__be32 *p; __be32 *p;
p = xdr_reserve_space(xdr, NFS4_VERIFIER_SIZE); nfserr = nfsd4_encode_verifier4(xdr, &readdir->rd_verf);
if (!p) if (nfserr != nfs_ok)
return nfserr_resource; return nfserr;
/* XXX: Following NFSv3, we ignore the READDIR verifier for now. */
*p++ = cpu_to_be32(0);
*p++ = cpu_to_be32(0);
xdr->buf->head[0].iov_len = (char *)xdr->p -
(char *)xdr->buf->head[0].iov_base;
/* /*
* Number of bytes left for directory entries allowing for the * Number of bytes left for directory entries allowing for the
@ -4299,13 +4312,8 @@ nfsd4_encode_remove(struct nfsd4_compoundres *resp, __be32 nfserr,
{ {
struct nfsd4_remove *remove = &u->remove; struct nfsd4_remove *remove = &u->remove;
struct xdr_stream *xdr = resp->xdr; struct xdr_stream *xdr = resp->xdr;
__be32 *p;
p = xdr_reserve_space(xdr, 20); return nfsd4_encode_change_info4(xdr, &remove->rm_cinfo);
if (!p)
return nfserr_resource;
p = encode_cinfo(p, &remove->rm_cinfo);
return 0;
} }
static __be32 static __be32
@ -4314,14 +4322,11 @@ nfsd4_encode_rename(struct nfsd4_compoundres *resp, __be32 nfserr,
{ {
struct nfsd4_rename *rename = &u->rename; struct nfsd4_rename *rename = &u->rename;
struct xdr_stream *xdr = resp->xdr; struct xdr_stream *xdr = resp->xdr;
__be32 *p;
p = xdr_reserve_space(xdr, 40); nfserr = nfsd4_encode_change_info4(xdr, &rename->rn_sinfo);
if (!p) if (nfserr)
return nfserr_resource; return nfserr;
p = encode_cinfo(p, &rename->rn_sinfo); return nfsd4_encode_change_info4(xdr, &rename->rn_tinfo);
p = encode_cinfo(p, &rename->rn_tinfo);
return 0;
} }
static __be32 static __be32
@ -4448,23 +4453,25 @@ nfsd4_encode_setclientid(struct nfsd4_compoundres *resp, __be32 nfserr,
{ {
struct nfsd4_setclientid *scd = &u->setclientid; struct nfsd4_setclientid *scd = &u->setclientid;
struct xdr_stream *xdr = resp->xdr; struct xdr_stream *xdr = resp->xdr;
__be32 *p;
if (!nfserr) { if (!nfserr) {
p = xdr_reserve_space(xdr, 8 + NFS4_VERIFIER_SIZE); nfserr = nfsd4_encode_clientid4(xdr, &scd->se_clientid);
if (!p) if (nfserr != nfs_ok)
return nfserr_resource; goto out;
p = xdr_encode_opaque_fixed(p, &scd->se_clientid, 8); nfserr = nfsd4_encode_verifier4(xdr, &scd->se_confirm);
p = xdr_encode_opaque_fixed(p, &scd->se_confirm, } else if (nfserr == nfserr_clid_inuse) {
NFS4_VERIFIER_SIZE); /* empty network id */
} if (xdr_stream_encode_u32(xdr, 0) < 0) {
else if (nfserr == nfserr_clid_inuse) { nfserr = nfserr_resource;
p = xdr_reserve_space(xdr, 8); goto out;
if (!p) }
return nfserr_resource; /* empty universal address */
*p++ = cpu_to_be32(0); if (xdr_stream_encode_u32(xdr, 0) < 0) {
*p++ = cpu_to_be32(0); nfserr = nfserr_resource;
goto out;
}
} }
out:
return nfserr; return nfserr;
} }
@ -4473,17 +4480,12 @@ nfsd4_encode_write(struct nfsd4_compoundres *resp, __be32 nfserr,
union nfsd4_op_u *u) union nfsd4_op_u *u)
{ {
struct nfsd4_write *write = &u->write; struct nfsd4_write *write = &u->write;
struct xdr_stream *xdr = resp->xdr;
__be32 *p;
p = xdr_reserve_space(xdr, 16); if (xdr_stream_encode_u32(resp->xdr, write->wr_bytes_written) < 0)
if (!p)
return nfserr_resource; return nfserr_resource;
*p++ = cpu_to_be32(write->wr_bytes_written); if (xdr_stream_encode_u32(resp->xdr, write->wr_how_written) < 0)
*p++ = cpu_to_be32(write->wr_how_written); return nfserr_resource;
p = xdr_encode_opaque_fixed(p, write->wr_verifier.data, return nfsd4_encode_verifier4(resp->xdr, &write->wr_verifier);
NFS4_VERIFIER_SIZE);
return 0;
} }
static __be32 static __be32
@ -4505,20 +4507,15 @@ nfsd4_encode_exchange_id(struct nfsd4_compoundres *resp, __be32 nfserr,
server_scope = nn->nfsd_name; server_scope = nn->nfsd_name;
server_scope_sz = strlen(nn->nfsd_name); server_scope_sz = strlen(nn->nfsd_name);
p = xdr_reserve_space(xdr, if (nfsd4_encode_clientid4(xdr, &exid->clientid) != nfs_ok)
8 /* eir_clientid */ + return nfserr_resource;
4 /* eir_sequenceid */ + if (xdr_stream_encode_u32(xdr, exid->seqid) < 0)
4 /* eir_flags */ + return nfserr_resource;
4 /* spr_how */); if (xdr_stream_encode_u32(xdr, exid->flags) < 0)
if (!p)
return nfserr_resource; return nfserr_resource;
p = xdr_encode_opaque_fixed(p, &exid->clientid, 8); if (xdr_stream_encode_u32(xdr, exid->spa_how) < 0)
*p++ = cpu_to_be32(exid->seqid); return nfserr_resource;
*p++ = cpu_to_be32(exid->flags);
*p++ = cpu_to_be32(exid->spa_how);
switch (exid->spa_how) { switch (exid->spa_how) {
case SP4_NONE: case SP4_NONE:
break; break;
@ -5099,15 +5096,8 @@ nfsd4_encode_setxattr(struct nfsd4_compoundres *resp, __be32 nfserr,
{ {
struct nfsd4_setxattr *setxattr = &u->setxattr; struct nfsd4_setxattr *setxattr = &u->setxattr;
struct xdr_stream *xdr = resp->xdr; struct xdr_stream *xdr = resp->xdr;
__be32 *p;
p = xdr_reserve_space(xdr, 20); return nfsd4_encode_change_info4(xdr, &setxattr->setxa_cinfo);
if (!p)
return nfserr_resource;
encode_cinfo(p, &setxattr->setxa_cinfo);
return 0;
} }
/* /*
@ -5253,14 +5243,8 @@ nfsd4_encode_removexattr(struct nfsd4_compoundres *resp, __be32 nfserr,
{ {
struct nfsd4_removexattr *removexattr = &u->removexattr; struct nfsd4_removexattr *removexattr = &u->removexattr;
struct xdr_stream *xdr = resp->xdr; struct xdr_stream *xdr = resp->xdr;
__be32 *p;
p = xdr_reserve_space(xdr, 20); return nfsd4_encode_change_info4(xdr, &removexattr->rmxa_cinfo);
if (!p)
return nfserr_resource;
p = encode_cinfo(p, &removexattr->rmxa_cinfo);
return 0;
} }
typedef __be32(*nfsd4_enc)(struct nfsd4_compoundres *, __be32, union nfsd4_op_u *u); typedef __be32(*nfsd4_enc)(struct nfsd4_compoundres *, __be32, union nfsd4_op_u *u);
@ -5460,6 +5444,12 @@ status:
release: release:
if (opdesc && opdesc->op_release) if (opdesc && opdesc->op_release)
opdesc->op_release(&op->u); opdesc->op_release(&op->u);
/*
* Account for pages consumed while encoding this operation.
* The xdr_stream primitives don't manage rq_next_page.
*/
rqstp->rq_next_page = xdr->page_ptr + 1;
} }
/* /*
@ -5528,9 +5518,6 @@ nfs4svc_encode_compoundres(struct svc_rqst *rqstp, struct xdr_stream *xdr)
p = resp->statusp; p = resp->statusp;
*p++ = resp->cstate.status; *p++ = resp->cstate.status;
rqstp->rq_next_page = xdr->page_ptr + 1;
*p++ = htonl(resp->taglen); *p++ = htonl(resp->taglen);
memcpy(p, resp->tag, resp->taglen); memcpy(p, resp->tag, resp->taglen);
p += XDR_QUADLEN(resp->taglen); p += XDR_QUADLEN(resp->taglen);

View File

@ -148,12 +148,23 @@ void nfsd_drc_slab_free(void)
kmem_cache_destroy(drc_slab); kmem_cache_destroy(drc_slab);
} }
static int nfsd_reply_cache_stats_init(struct nfsd_net *nn) /**
* nfsd_net_reply_cache_init - per net namespace reply cache set-up
* @nn: nfsd_net being initialized
*
* Returns zero on succes; otherwise a negative errno is returned.
*/
int nfsd_net_reply_cache_init(struct nfsd_net *nn)
{ {
return nfsd_percpu_counters_init(nn->counter, NFSD_NET_COUNTERS_NUM); return nfsd_percpu_counters_init(nn->counter, NFSD_NET_COUNTERS_NUM);
} }
static void nfsd_reply_cache_stats_destroy(struct nfsd_net *nn) /**
* nfsd_net_reply_cache_destroy - per net namespace reply cache tear-down
* @nn: nfsd_net being freed
*
*/
void nfsd_net_reply_cache_destroy(struct nfsd_net *nn)
{ {
nfsd_percpu_counters_destroy(nn->counter, NFSD_NET_COUNTERS_NUM); nfsd_percpu_counters_destroy(nn->counter, NFSD_NET_COUNTERS_NUM);
} }
@ -169,17 +180,13 @@ int nfsd_reply_cache_init(struct nfsd_net *nn)
hashsize = nfsd_hashsize(nn->max_drc_entries); hashsize = nfsd_hashsize(nn->max_drc_entries);
nn->maskbits = ilog2(hashsize); nn->maskbits = ilog2(hashsize);
status = nfsd_reply_cache_stats_init(nn);
if (status)
goto out_nomem;
nn->nfsd_reply_cache_shrinker.scan_objects = nfsd_reply_cache_scan; nn->nfsd_reply_cache_shrinker.scan_objects = nfsd_reply_cache_scan;
nn->nfsd_reply_cache_shrinker.count_objects = nfsd_reply_cache_count; nn->nfsd_reply_cache_shrinker.count_objects = nfsd_reply_cache_count;
nn->nfsd_reply_cache_shrinker.seeks = 1; nn->nfsd_reply_cache_shrinker.seeks = 1;
status = register_shrinker(&nn->nfsd_reply_cache_shrinker, status = register_shrinker(&nn->nfsd_reply_cache_shrinker,
"nfsd-reply:%s", nn->nfsd_name); "nfsd-reply:%s", nn->nfsd_name);
if (status) if (status)
goto out_stats_destroy; return status;
nn->drc_hashtbl = kvzalloc(array_size(hashsize, nn->drc_hashtbl = kvzalloc(array_size(hashsize,
sizeof(*nn->drc_hashtbl)), GFP_KERNEL); sizeof(*nn->drc_hashtbl)), GFP_KERNEL);
@ -195,9 +202,6 @@ int nfsd_reply_cache_init(struct nfsd_net *nn)
return 0; return 0;
out_shrinker: out_shrinker:
unregister_shrinker(&nn->nfsd_reply_cache_shrinker); unregister_shrinker(&nn->nfsd_reply_cache_shrinker);
out_stats_destroy:
nfsd_reply_cache_stats_destroy(nn);
out_nomem:
printk(KERN_ERR "nfsd: failed to allocate reply cache\n"); printk(KERN_ERR "nfsd: failed to allocate reply cache\n");
return -ENOMEM; return -ENOMEM;
} }
@ -217,7 +221,6 @@ void nfsd_reply_cache_shutdown(struct nfsd_net *nn)
rp, nn); rp, nn);
} }
} }
nfsd_reply_cache_stats_destroy(nn);
kvfree(nn->drc_hashtbl); kvfree(nn->drc_hashtbl);
nn->drc_hashtbl = NULL; nn->drc_hashtbl = NULL;

View File

@ -25,6 +25,7 @@
#include "netns.h" #include "netns.h"
#include "pnfs.h" #include "pnfs.h"
#include "filecache.h" #include "filecache.h"
#include "trace.h"
/* /*
* We have a single directory with several nodes in it. * We have a single directory with several nodes in it.
@ -109,12 +110,12 @@ static ssize_t nfsctl_transaction_write(struct file *file, const char __user *bu
if (IS_ERR(data)) if (IS_ERR(data))
return PTR_ERR(data); return PTR_ERR(data);
rv = write_op[ino](file, data, size); rv = write_op[ino](file, data, size);
if (rv >= 0) { if (rv < 0)
simple_transaction_set(file, rv); return rv;
rv = size;
} simple_transaction_set(file, rv);
return rv; return size;
} }
static ssize_t nfsctl_transaction_read(struct file *file, char __user *buf, size_t size, loff_t *pos) static ssize_t nfsctl_transaction_read(struct file *file, char __user *buf, size_t size, loff_t *pos)
@ -230,6 +231,7 @@ static ssize_t write_unlock_ip(struct file *file, char *buf, size_t size)
if (rpc_pton(net, fo_path, size, sap, salen) == 0) if (rpc_pton(net, fo_path, size, sap, salen) == 0)
return -EINVAL; return -EINVAL;
trace_nfsd_ctl_unlock_ip(net, buf);
return nlmsvc_unlock_all_by_ip(sap); return nlmsvc_unlock_all_by_ip(sap);
} }
@ -263,7 +265,7 @@ static ssize_t write_unlock_fs(struct file *file, char *buf, size_t size)
fo_path = buf; fo_path = buf;
if (qword_get(&buf, fo_path, size) < 0) if (qword_get(&buf, fo_path, size) < 0)
return -EINVAL; return -EINVAL;
trace_nfsd_ctl_unlock_fs(netns(file), fo_path);
error = kern_path(fo_path, 0, &path); error = kern_path(fo_path, 0, &path);
if (error) if (error)
return error; return error;
@ -324,7 +326,7 @@ static ssize_t write_filehandle(struct file *file, char *buf, size_t size)
len = qword_get(&mesg, dname, size); len = qword_get(&mesg, dname, size);
if (len <= 0) if (len <= 0)
return -EINVAL; return -EINVAL;
path = dname+len+1; path = dname+len+1;
len = qword_get(&mesg, path, size); len = qword_get(&mesg, path, size);
if (len <= 0) if (len <= 0)
@ -338,15 +340,17 @@ static ssize_t write_filehandle(struct file *file, char *buf, size_t size)
return -EINVAL; return -EINVAL;
maxsize = min(maxsize, NFS3_FHSIZE); maxsize = min(maxsize, NFS3_FHSIZE);
if (qword_get(&mesg, mesg, size)>0) if (qword_get(&mesg, mesg, size) > 0)
return -EINVAL; return -EINVAL;
trace_nfsd_ctl_filehandle(netns(file), dname, path, maxsize);
/* we have all the words, they are in buf.. */ /* we have all the words, they are in buf.. */
dom = unix_domain_find(dname); dom = unix_domain_find(dname);
if (!dom) if (!dom)
return -ENOMEM; return -ENOMEM;
len = exp_rootfh(netns(file), dom, path, &fh, maxsize); len = exp_rootfh(netns(file), dom, path, &fh, maxsize);
auth_domain_put(dom); auth_domain_put(dom);
if (len) if (len)
return len; return len;
@ -399,6 +403,7 @@ static ssize_t write_threads(struct file *file, char *buf, size_t size)
return rv; return rv;
if (newthreads < 0) if (newthreads < 0)
return -EINVAL; return -EINVAL;
trace_nfsd_ctl_threads(net, newthreads);
rv = nfsd_svc(newthreads, net, file->f_cred); rv = nfsd_svc(newthreads, net, file->f_cred);
if (rv < 0) if (rv < 0)
return rv; return rv;
@ -418,8 +423,8 @@ static ssize_t write_threads(struct file *file, char *buf, size_t size)
* OR * OR
* *
* Input: * Input:
* buf: C string containing whitespace- * buf: C string containing whitespace-
* separated unsigned integer values * separated unsigned integer values
* representing the number of NFSD * representing the number of NFSD
* threads to start in each pool * threads to start in each pool
* size: non-zero length of C string in @buf * size: non-zero length of C string in @buf
@ -471,6 +476,7 @@ static ssize_t write_pool_threads(struct file *file, char *buf, size_t size)
rv = -EINVAL; rv = -EINVAL;
if (nthreads[i] < 0) if (nthreads[i] < 0)
goto out_free; goto out_free;
trace_nfsd_ctl_pool_threads(net, i, nthreads[i]);
} }
rv = nfsd_set_nrthreads(i, nthreads, net); rv = nfsd_set_nrthreads(i, nthreads, net);
if (rv) if (rv)
@ -526,7 +532,7 @@ static ssize_t __write_versions(struct file *file, char *buf, size_t size)
char *sep; char *sep;
struct nfsd_net *nn = net_generic(netns(file), nfsd_net_id); struct nfsd_net *nn = net_generic(netns(file), nfsd_net_id);
if (size>0) { if (size > 0) {
if (nn->nfsd_serv) if (nn->nfsd_serv)
/* Cannot change versions without updating /* Cannot change versions without updating
* nn->nfsd_serv->sv_xdrsize, and reallocing * nn->nfsd_serv->sv_xdrsize, and reallocing
@ -536,6 +542,7 @@ static ssize_t __write_versions(struct file *file, char *buf, size_t size)
if (buf[size-1] != '\n') if (buf[size-1] != '\n')
return -EINVAL; return -EINVAL;
buf[size-1] = 0; buf[size-1] = 0;
trace_nfsd_ctl_version(netns(file), buf);
vers = mesg; vers = mesg;
len = qword_get(&mesg, vers, size); len = qword_get(&mesg, vers, size);
@ -637,11 +644,11 @@ out:
* OR * OR
* *
* Input: * Input:
* buf: C string containing whitespace- * buf: C string containing whitespace-
* separated positive or negative * separated positive or negative
* integer values representing NFS * integer values representing NFS
* protocol versions to enable ("+n") * protocol versions to enable ("+n")
* or disable ("-n") * or disable ("-n")
* size: non-zero length of C string in @buf * size: non-zero length of C string in @buf
* Output: * Output:
* On success: status of zero or more protocol versions has * On success: status of zero or more protocol versions has
@ -689,6 +696,7 @@ static ssize_t __write_ports_addfd(char *buf, struct net *net, const struct cred
err = get_int(&mesg, &fd); err = get_int(&mesg, &fd);
if (err != 0 || fd < 0) if (err != 0 || fd < 0)
return -EINVAL; return -EINVAL;
trace_nfsd_ctl_ports_addfd(net, fd);
err = nfsd_create_serv(net); err = nfsd_create_serv(net);
if (err != 0) if (err != 0)
@ -705,7 +713,7 @@ static ssize_t __write_ports_addfd(char *buf, struct net *net, const struct cred
} }
/* /*
* A transport listener is added by writing it's transport name and * A transport listener is added by writing its transport name and
* a port number. * a port number.
*/ */
static ssize_t __write_ports_addxprt(char *buf, struct net *net, const struct cred *cred) static ssize_t __write_ports_addxprt(char *buf, struct net *net, const struct cred *cred)
@ -720,6 +728,7 @@ static ssize_t __write_ports_addxprt(char *buf, struct net *net, const struct cr
if (port < 1 || port > USHRT_MAX) if (port < 1 || port > USHRT_MAX)
return -EINVAL; return -EINVAL;
trace_nfsd_ctl_ports_addxprt(net, transport, port);
err = nfsd_create_serv(net); err = nfsd_create_serv(net);
if (err != 0) if (err != 0)
@ -832,9 +841,9 @@ int nfsd_max_blksize;
* OR * OR
* *
* Input: * Input:
* buf: C string containing an unsigned * buf: C string containing an unsigned
* integer value representing the new * integer value representing the new
* NFS blksize * NFS blksize
* size: non-zero length of C string in @buf * size: non-zero length of C string in @buf
* Output: * Output:
* On success: passed-in buffer filled with '\n'-terminated C string * On success: passed-in buffer filled with '\n'-terminated C string
@ -853,6 +862,8 @@ static ssize_t write_maxblksize(struct file *file, char *buf, size_t size)
int rv = get_int(&mesg, &bsize); int rv = get_int(&mesg, &bsize);
if (rv) if (rv)
return rv; return rv;
trace_nfsd_ctl_maxblksize(netns(file), bsize);
/* force bsize into allowed range and /* force bsize into allowed range and
* required alignment. * required alignment.
*/ */
@ -881,9 +892,9 @@ static ssize_t write_maxblksize(struct file *file, char *buf, size_t size)
* OR * OR
* *
* Input: * Input:
* buf: C string containing an unsigned * buf: C string containing an unsigned
* integer value representing the new * integer value representing the new
* number of max connections * number of max connections
* size: non-zero length of C string in @buf * size: non-zero length of C string in @buf
* Output: * Output:
* On success: passed-in buffer filled with '\n'-terminated C string * On success: passed-in buffer filled with '\n'-terminated C string
@ -903,6 +914,7 @@ static ssize_t write_maxconn(struct file *file, char *buf, size_t size)
if (rv) if (rv)
return rv; return rv;
trace_nfsd_ctl_maxconn(netns(file), maxconn);
nn->max_connections = maxconn; nn->max_connections = maxconn;
} }
@ -913,6 +925,7 @@ static ssize_t write_maxconn(struct file *file, char *buf, size_t size)
static ssize_t __nfsd4_write_time(struct file *file, char *buf, size_t size, static ssize_t __nfsd4_write_time(struct file *file, char *buf, size_t size,
time64_t *time, struct nfsd_net *nn) time64_t *time, struct nfsd_net *nn)
{ {
struct dentry *dentry = file_dentry(file);
char *mesg = buf; char *mesg = buf;
int rv, i; int rv, i;
@ -922,6 +935,9 @@ static ssize_t __nfsd4_write_time(struct file *file, char *buf, size_t size,
rv = get_int(&mesg, &i); rv = get_int(&mesg, &i);
if (rv) if (rv)
return rv; return rv;
trace_nfsd_ctl_time(netns(file), dentry->d_name.name,
dentry->d_name.len, i);
/* /*
* Some sanity checking. We don't have a reason for * Some sanity checking. We don't have a reason for
* these particular numbers, but problems with the * these particular numbers, but problems with the
@ -1014,6 +1030,7 @@ static ssize_t __write_recoverydir(struct file *file, char *buf, size_t size,
len = qword_get(&mesg, recdir, size); len = qword_get(&mesg, recdir, size);
if (len <= 0) if (len <= 0)
return -EINVAL; return -EINVAL;
trace_nfsd_ctl_recoverydir(netns(file), recdir);
status = nfs4_reset_recoverydir(recdir); status = nfs4_reset_recoverydir(recdir);
if (status) if (status)
@ -1065,7 +1082,7 @@ static ssize_t write_recoverydir(struct file *file, char *buf, size_t size)
* OR * OR
* *
* Input: * Input:
* buf: any value * buf: any value
* size: non-zero length of C string in @buf * size: non-zero length of C string in @buf
* Output: * Output:
* passed-in buffer filled with "Y" or "N" with a newline * passed-in buffer filled with "Y" or "N" with a newline
@ -1087,7 +1104,7 @@ static ssize_t write_v4_end_grace(struct file *file, char *buf, size_t size)
case '1': case '1':
if (!nn->nfsd_serv) if (!nn->nfsd_serv)
return -EBUSY; return -EBUSY;
nfsd4_end_grace(nn); trace_nfsd_end_grace(netns(file));
break; break;
default: default:
return -EINVAL; return -EINVAL;
@ -1192,8 +1209,8 @@ static int __nfsd_symlink(struct inode *dir, struct dentry *dentry,
* @content is assumed to be a NUL-terminated string that lives * @content is assumed to be a NUL-terminated string that lives
* longer than the symlink itself. * longer than the symlink itself.
*/ */
static void nfsd_symlink(struct dentry *parent, const char *name, static void _nfsd_symlink(struct dentry *parent, const char *name,
const char *content) const char *content)
{ {
struct inode *dir = parent->d_inode; struct inode *dir = parent->d_inode;
struct dentry *dentry; struct dentry *dentry;
@ -1210,8 +1227,8 @@ out:
inode_unlock(dir); inode_unlock(dir);
} }
#else #else
static inline void nfsd_symlink(struct dentry *parent, const char *name, static inline void _nfsd_symlink(struct dentry *parent, const char *name,
const char *content) const char *content)
{ {
} }
@ -1389,8 +1406,8 @@ static int nfsd_fill_super(struct super_block *sb, struct fs_context *fc)
ret = simple_fill_super(sb, 0x6e667364, nfsd_files); ret = simple_fill_super(sb, 0x6e667364, nfsd_files);
if (ret) if (ret)
return ret; return ret;
nfsd_symlink(sb->s_root, "supported_krb5_enctypes", _nfsd_symlink(sb->s_root, "supported_krb5_enctypes",
"/proc/net/rpc/gss_krb5_enctypes"); "/proc/net/rpc/gss_krb5_enctypes");
dentry = nfsd_mkdir(sb->s_root, NULL, "clients"); dentry = nfsd_mkdir(sb->s_root, NULL, "clients");
if (IS_ERR(dentry)) if (IS_ERR(dentry))
return PTR_ERR(dentry); return PTR_ERR(dentry);
@ -1477,7 +1494,17 @@ static int create_proc_exports_entry(void)
unsigned int nfsd_net_id; unsigned int nfsd_net_id;
static __net_init int nfsd_init_net(struct net *net) /**
* nfsd_net_init - Prepare the nfsd_net portion of a new net namespace
* @net: a freshly-created network namespace
*
* This information stays around as long as the network namespace is
* alive whether or not there is an NFSD instance running in the
* namespace.
*
* Returns zero on success, or a negative errno otherwise.
*/
static __net_init int nfsd_net_init(struct net *net)
{ {
int retval; int retval;
struct nfsd_net *nn = net_generic(net, nfsd_net_id); struct nfsd_net *nn = net_generic(net, nfsd_net_id);
@ -1488,6 +1515,9 @@ static __net_init int nfsd_init_net(struct net *net)
retval = nfsd_idmap_init(net); retval = nfsd_idmap_init(net);
if (retval) if (retval)
goto out_idmap_error; goto out_idmap_error;
retval = nfsd_net_reply_cache_init(nn);
if (retval)
goto out_repcache_error;
nn->nfsd_versions = NULL; nn->nfsd_versions = NULL;
nn->nfsd4_minorversions = NULL; nn->nfsd4_minorversions = NULL;
nfsd4_init_leases_net(nn); nfsd4_init_leases_net(nn);
@ -1496,22 +1526,32 @@ static __net_init int nfsd_init_net(struct net *net)
return 0; return 0;
out_repcache_error:
nfsd_idmap_shutdown(net);
out_idmap_error: out_idmap_error:
nfsd_export_shutdown(net); nfsd_export_shutdown(net);
out_export_error: out_export_error:
return retval; return retval;
} }
static __net_exit void nfsd_exit_net(struct net *net) /**
* nfsd_net_exit - Release the nfsd_net portion of a net namespace
* @net: a network namespace that is about to be destroyed
*
*/
static __net_exit void nfsd_net_exit(struct net *net)
{ {
struct nfsd_net *nn = net_generic(net, nfsd_net_id);
nfsd_net_reply_cache_destroy(nn);
nfsd_idmap_shutdown(net); nfsd_idmap_shutdown(net);
nfsd_export_shutdown(net); nfsd_export_shutdown(net);
nfsd_netns_free_versions(net_generic(net, nfsd_net_id)); nfsd_netns_free_versions(nn);
} }
static struct pernet_operations nfsd_net_ops = { static struct pernet_operations nfsd_net_ops = {
.init = nfsd_init_net, .init = nfsd_net_init,
.exit = nfsd_exit_net, .exit = nfsd_net_exit,
.id = &nfsd_net_id, .id = &nfsd_net_id,
.size = sizeof(struct nfsd_net), .size = sizeof(struct nfsd_net),
}; };

View File

@ -623,16 +623,9 @@ void fh_fill_pre_attrs(struct svc_fh *fhp)
inode = d_inode(fhp->fh_dentry); inode = d_inode(fhp->fh_dentry);
err = fh_getattr(fhp, &stat); err = fh_getattr(fhp, &stat);
if (err) { if (err)
/* Grab the times from inode anyway */ return;
stat.mtime = inode->i_mtime;
stat.ctime = inode->i_ctime;
stat.size = inode->i_size;
if (v4 && IS_I_VERSION(inode)) {
stat.change_cookie = inode_query_iversion(inode);
stat.result_mask |= STATX_CHANGE_COOKIE;
}
}
if (v4) if (v4)
fhp->fh_pre_change = nfsd4_change_attribute(&stat, inode); fhp->fh_pre_change = nfsd4_change_attribute(&stat, inode);
@ -660,15 +653,10 @@ void fh_fill_post_attrs(struct svc_fh *fhp)
printk("nfsd: inode locked twice during operation.\n"); printk("nfsd: inode locked twice during operation.\n");
err = fh_getattr(fhp, &fhp->fh_post_attr); err = fh_getattr(fhp, &fhp->fh_post_attr);
if (err) { if (err)
fhp->fh_post_saved = false; return;
fhp->fh_post_attr.ctime = inode->i_ctime;
if (v4 && IS_I_VERSION(inode)) { fhp->fh_post_saved = true;
fhp->fh_post_attr.change_cookie = inode_query_iversion(inode);
fhp->fh_post_attr.result_mask |= STATX_CHANGE_COOKIE;
}
} else
fhp->fh_post_saved = true;
if (v4) if (v4)
fhp->fh_post_change = fhp->fh_post_change =
nfsd4_change_attribute(&fhp->fh_post_attr, inode); nfsd4_change_attribute(&fhp->fh_post_attr, inode);

View File

@ -176,9 +176,7 @@ nfsd_proc_read(struct svc_rqst *rqstp)
{ {
struct nfsd_readargs *argp = rqstp->rq_argp; struct nfsd_readargs *argp = rqstp->rq_argp;
struct nfsd_readres *resp = rqstp->rq_resp; struct nfsd_readres *resp = rqstp->rq_resp;
unsigned int len;
u32 eof; u32 eof;
int v;
dprintk("nfsd: READ %s %d bytes at %d\n", dprintk("nfsd: READ %s %d bytes at %d\n",
SVCFH_fmt(&argp->fh), SVCFH_fmt(&argp->fh),
@ -187,17 +185,7 @@ nfsd_proc_read(struct svc_rqst *rqstp)
argp->count = min_t(u32, argp->count, NFSSVC_MAXBLKSIZE_V2); argp->count = min_t(u32, argp->count, NFSSVC_MAXBLKSIZE_V2);
argp->count = min_t(u32, argp->count, rqstp->rq_res.buflen); argp->count = min_t(u32, argp->count, rqstp->rq_res.buflen);
v = 0;
len = argp->count;
resp->pages = rqstp->rq_next_page; resp->pages = rqstp->rq_next_page;
while (len > 0) {
struct page *page = *(rqstp->rq_next_page++);
rqstp->rq_vec[v].iov_base = page_address(page);
rqstp->rq_vec[v].iov_len = min_t(unsigned int, len, PAGE_SIZE);
len -= rqstp->rq_vec[v].iov_len;
v++;
}
/* Obtain buffer pointer for payload. 19 is 1 word for /* Obtain buffer pointer for payload. 19 is 1 word for
* status, 17 words for fattr, and 1 word for the byte count. * status, 17 words for fattr, and 1 word for the byte count.
@ -207,7 +195,7 @@ nfsd_proc_read(struct svc_rqst *rqstp)
resp->count = argp->count; resp->count = argp->count;
fh_copy(&resp->fh, &argp->fh); fh_copy(&resp->fh, &argp->fh);
resp->status = nfsd_read(rqstp, &resp->fh, argp->offset, resp->status = nfsd_read(rqstp, &resp->fh, argp->offset,
rqstp->rq_vec, v, &resp->count, &eof); &resp->count, &eof);
if (resp->status == nfs_ok) if (resp->status == nfs_ok)
resp->status = fh_getattr(&resp->fh, &resp->stat); resp->status = fh_getattr(&resp->fh, &resp->stat);
else if (resp->status == nfserr_jukebox) else if (resp->status == nfserr_jukebox)

View File

@ -402,6 +402,11 @@ void nfsd_reset_write_verifier(struct nfsd_net *nn)
write_sequnlock(&nn->writeverf_lock); write_sequnlock(&nn->writeverf_lock);
} }
/*
* Crank up a set of per-namespace resources for a new NFSD instance,
* including lockd, a duplicate reply cache, an open file cache
* instance, and a cache of NFSv4 state objects.
*/
static int nfsd_startup_net(struct net *net, const struct cred *cred) static int nfsd_startup_net(struct net *net, const struct cred *cred)
{ {
struct nfsd_net *nn = net_generic(net, nfsd_net_id); struct nfsd_net *nn = net_generic(net, nfsd_net_id);

View File

@ -468,7 +468,8 @@ nfssvc_encode_readlinkres(struct svc_rqst *rqstp, struct xdr_stream *xdr)
case nfs_ok: case nfs_ok:
if (xdr_stream_encode_u32(xdr, resp->len) < 0) if (xdr_stream_encode_u32(xdr, resp->len) < 0)
return false; return false;
xdr_write_pages(xdr, &resp->page, 0, resp->len); svcxdr_encode_opaque_pages(rqstp, xdr, &resp->page, 0,
resp->len);
if (svc_encode_result_payload(rqstp, head->iov_len, resp->len) < 0) if (svc_encode_result_payload(rqstp, head->iov_len, resp->len) < 0)
return false; return false;
break; break;
@ -491,8 +492,9 @@ nfssvc_encode_readres(struct svc_rqst *rqstp, struct xdr_stream *xdr)
return false; return false;
if (xdr_stream_encode_u32(xdr, resp->count) < 0) if (xdr_stream_encode_u32(xdr, resp->count) < 0)
return false; return false;
xdr_write_pages(xdr, resp->pages, rqstp->rq_res.page_base, svcxdr_encode_opaque_pages(rqstp, xdr, resp->pages,
resp->count); rqstp->rq_res.page_base,
resp->count);
if (svc_encode_result_payload(rqstp, head->iov_len, resp->count) < 0) if (svc_encode_result_payload(rqstp, head->iov_len, resp->count) < 0)
return false; return false;
break; break;
@ -511,7 +513,8 @@ nfssvc_encode_readdirres(struct svc_rqst *rqstp, struct xdr_stream *xdr)
return false; return false;
switch (resp->status) { switch (resp->status) {
case nfs_ok: case nfs_ok:
xdr_write_pages(xdr, dirlist->pages, 0, dirlist->len); svcxdr_encode_opaque_pages(rqstp, xdr, dirlist->pages, 0,
dirlist->len);
/* no more entries */ /* no more entries */
if (xdr_stream_encode_item_absent(xdr) < 0) if (xdr_stream_encode_item_absent(xdr) < 0)
return false; return false;

View File

@ -1581,6 +1581,265 @@ TRACE_EVENT(nfsd_cb_recall_any_done,
) )
); );
TRACE_EVENT(nfsd_ctl_unlock_ip,
TP_PROTO(
const struct net *net,
const char *address
),
TP_ARGS(net, address),
TP_STRUCT__entry(
__field(unsigned int, netns_ino)
__string(address, address)
),
TP_fast_assign(
__entry->netns_ino = net->ns.inum;
__assign_str(address, address);
),
TP_printk("address=%s",
__get_str(address)
)
);
TRACE_EVENT(nfsd_ctl_unlock_fs,
TP_PROTO(
const struct net *net,
const char *path
),
TP_ARGS(net, path),
TP_STRUCT__entry(
__field(unsigned int, netns_ino)
__string(path, path)
),
TP_fast_assign(
__entry->netns_ino = net->ns.inum;
__assign_str(path, path);
),
TP_printk("path=%s",
__get_str(path)
)
);
TRACE_EVENT(nfsd_ctl_filehandle,
TP_PROTO(
const struct net *net,
const char *domain,
const char *path,
int maxsize
),
TP_ARGS(net, domain, path, maxsize),
TP_STRUCT__entry(
__field(unsigned int, netns_ino)
__field(int, maxsize)
__string(domain, domain)
__string(path, path)
),
TP_fast_assign(
__entry->netns_ino = net->ns.inum;
__entry->maxsize = maxsize;
__assign_str(domain, domain);
__assign_str(path, path);
),
TP_printk("domain=%s path=%s maxsize=%d",
__get_str(domain), __get_str(path), __entry->maxsize
)
);
TRACE_EVENT(nfsd_ctl_threads,
TP_PROTO(
const struct net *net,
int newthreads
),
TP_ARGS(net, newthreads),
TP_STRUCT__entry(
__field(unsigned int, netns_ino)
__field(int, newthreads)
),
TP_fast_assign(
__entry->netns_ino = net->ns.inum;
__entry->newthreads = newthreads;
),
TP_printk("newthreads=%d",
__entry->newthreads
)
);
TRACE_EVENT(nfsd_ctl_pool_threads,
TP_PROTO(
const struct net *net,
int pool,
int nrthreads
),
TP_ARGS(net, pool, nrthreads),
TP_STRUCT__entry(
__field(unsigned int, netns_ino)
__field(int, pool)
__field(int, nrthreads)
),
TP_fast_assign(
__entry->netns_ino = net->ns.inum;
__entry->pool = pool;
__entry->nrthreads = nrthreads;
),
TP_printk("pool=%d nrthreads=%d",
__entry->pool, __entry->nrthreads
)
);
TRACE_EVENT(nfsd_ctl_version,
TP_PROTO(
const struct net *net,
const char *mesg
),
TP_ARGS(net, mesg),
TP_STRUCT__entry(
__field(unsigned int, netns_ino)
__string(mesg, mesg)
),
TP_fast_assign(
__entry->netns_ino = net->ns.inum;
__assign_str(mesg, mesg);
),
TP_printk("%s",
__get_str(mesg)
)
);
TRACE_EVENT(nfsd_ctl_ports_addfd,
TP_PROTO(
const struct net *net,
int fd
),
TP_ARGS(net, fd),
TP_STRUCT__entry(
__field(unsigned int, netns_ino)
__field(int, fd)
),
TP_fast_assign(
__entry->netns_ino = net->ns.inum;
__entry->fd = fd;
),
TP_printk("fd=%d",
__entry->fd
)
);
TRACE_EVENT(nfsd_ctl_ports_addxprt,
TP_PROTO(
const struct net *net,
const char *transport,
int port
),
TP_ARGS(net, transport, port),
TP_STRUCT__entry(
__field(unsigned int, netns_ino)
__field(int, port)
__string(transport, transport)
),
TP_fast_assign(
__entry->netns_ino = net->ns.inum;
__entry->port = port;
__assign_str(transport, transport);
),
TP_printk("transport=%s port=%d",
__get_str(transport), __entry->port
)
);
TRACE_EVENT(nfsd_ctl_maxblksize,
TP_PROTO(
const struct net *net,
int bsize
),
TP_ARGS(net, bsize),
TP_STRUCT__entry(
__field(unsigned int, netns_ino)
__field(int, bsize)
),
TP_fast_assign(
__entry->netns_ino = net->ns.inum;
__entry->bsize = bsize;
),
TP_printk("bsize=%d",
__entry->bsize
)
);
TRACE_EVENT(nfsd_ctl_maxconn,
TP_PROTO(
const struct net *net,
int maxconn
),
TP_ARGS(net, maxconn),
TP_STRUCT__entry(
__field(unsigned int, netns_ino)
__field(int, maxconn)
),
TP_fast_assign(
__entry->netns_ino = net->ns.inum;
__entry->maxconn = maxconn;
),
TP_printk("maxconn=%d",
__entry->maxconn
)
);
TRACE_EVENT(nfsd_ctl_time,
TP_PROTO(
const struct net *net,
const char *name,
size_t namelen,
int time
),
TP_ARGS(net, name, namelen, time),
TP_STRUCT__entry(
__field(unsigned int, netns_ino)
__field(int, time)
__string_len(name, name, namelen)
),
TP_fast_assign(
__entry->netns_ino = net->ns.inum;
__entry->time = time;
__assign_str_len(name, name, namelen);
),
TP_printk("file=%s time=%d\n",
__get_str(name), __entry->time
)
);
TRACE_EVENT(nfsd_ctl_recoverydir,
TP_PROTO(
const struct net *net,
const char *recdir
),
TP_ARGS(net, recdir),
TP_STRUCT__entry(
__field(unsigned int, netns_ino)
__string(recdir, recdir)
),
TP_fast_assign(
__entry->netns_ino = net->ns.inum;
__assign_str(recdir, recdir);
),
TP_printk("recdir=%s",
__get_str(recdir)
)
);
TRACE_EVENT(nfsd_end_grace,
TP_PROTO(
const struct net *net
),
TP_ARGS(net),
TP_STRUCT__entry(
__field(unsigned int, netns_ino)
),
TP_fast_assign(
__entry->netns_ino = net->ns.inum;
),
TP_printk("nn=%d", __entry->netns_ino
)
);
#endif /* _NFSD_TRACE_H */ #endif /* _NFSD_TRACE_H */
#undef TRACE_INCLUDE_PATH #undef TRACE_INCLUDE_PATH

View File

@ -388,7 +388,9 @@ nfsd_sanitize_attrs(struct inode *inode, struct iattr *iap)
iap->ia_mode &= ~S_ISGID; iap->ia_mode &= ~S_ISGID;
} else { } else {
/* set ATTR_KILL_* bits and let VFS handle it */ /* set ATTR_KILL_* bits and let VFS handle it */
iap->ia_valid |= (ATTR_KILL_SUID | ATTR_KILL_SGID); iap->ia_valid |= ATTR_KILL_SUID;
iap->ia_valid |=
setattr_should_drop_sgid(&nop_mnt_idmap, inode);
} }
} }
} }
@ -1001,6 +1003,18 @@ static __be32 nfsd_finish_read(struct svc_rqst *rqstp, struct svc_fh *fhp,
} }
} }
/**
* nfsd_splice_read - Perform a VFS read using a splice pipe
* @rqstp: RPC transaction context
* @fhp: file handle of file to be read
* @file: opened struct file of file to be read
* @offset: starting byte offset
* @count: IN: requested number of bytes; OUT: number of bytes read
* @eof: OUT: set non-zero if operation reached the end of the file
*
* Returns nfs_ok on success, otherwise an nfserr stat value is
* returned.
*/
__be32 nfsd_splice_read(struct svc_rqst *rqstp, struct svc_fh *fhp, __be32 nfsd_splice_read(struct svc_rqst *rqstp, struct svc_fh *fhp,
struct file *file, loff_t offset, unsigned long *count, struct file *file, loff_t offset, unsigned long *count,
u32 *eof) u32 *eof)
@ -1014,22 +1028,50 @@ __be32 nfsd_splice_read(struct svc_rqst *rqstp, struct svc_fh *fhp,
ssize_t host_err; ssize_t host_err;
trace_nfsd_read_splice(rqstp, fhp, offset, *count); trace_nfsd_read_splice(rqstp, fhp, offset, *count);
rqstp->rq_next_page = rqstp->rq_respages + 1;
host_err = splice_direct_to_actor(file, &sd, nfsd_direct_splice_actor); host_err = splice_direct_to_actor(file, &sd, nfsd_direct_splice_actor);
return nfsd_finish_read(rqstp, fhp, file, offset, count, eof, host_err); return nfsd_finish_read(rqstp, fhp, file, offset, count, eof, host_err);
} }
__be32 nfsd_readv(struct svc_rqst *rqstp, struct svc_fh *fhp, /**
struct file *file, loff_t offset, * nfsd_iter_read - Perform a VFS read using an iterator
struct kvec *vec, int vlen, unsigned long *count, * @rqstp: RPC transaction context
u32 *eof) * @fhp: file handle of file to be read
* @file: opened struct file of file to be read
* @offset: starting byte offset
* @count: IN: requested number of bytes; OUT: number of bytes read
* @base: offset in first page of read buffer
* @eof: OUT: set non-zero if operation reached the end of the file
*
* Some filesystems or situations cannot use nfsd_splice_read. This
* function is the slightly less-performant fallback for those cases.
*
* Returns nfs_ok on success, otherwise an nfserr stat value is
* returned.
*/
__be32 nfsd_iter_read(struct svc_rqst *rqstp, struct svc_fh *fhp,
struct file *file, loff_t offset, unsigned long *count,
unsigned int base, u32 *eof)
{ {
unsigned long v, total;
struct iov_iter iter; struct iov_iter iter;
loff_t ppos = offset; loff_t ppos = offset;
struct page *page;
ssize_t host_err; ssize_t host_err;
v = 0;
total = *count;
while (total) {
page = *(rqstp->rq_next_page++);
rqstp->rq_vec[v].iov_base = page_address(page) + base;
rqstp->rq_vec[v].iov_len = min_t(size_t, total, PAGE_SIZE - base);
total -= rqstp->rq_vec[v].iov_len;
++v;
base = 0;
}
WARN_ON_ONCE(v > ARRAY_SIZE(rqstp->rq_vec));
trace_nfsd_read_vector(rqstp, fhp, offset, *count); trace_nfsd_read_vector(rqstp, fhp, offset, *count);
iov_iter_kvec(&iter, ITER_DEST, vec, vlen, *count); iov_iter_kvec(&iter, ITER_DEST, rqstp->rq_vec, v, *count);
host_err = vfs_iter_read(file, &iter, &ppos, 0); host_err = vfs_iter_read(file, &iter, &ppos, 0);
return nfsd_finish_read(rqstp, fhp, file, offset, count, eof, host_err); return nfsd_finish_read(rqstp, fhp, file, offset, count, eof, host_err);
} }
@ -1159,14 +1201,24 @@ out_nfserr:
return nfserr; return nfserr;
} }
/* /**
* Read data from a file. count must contain the requested read count * nfsd_read - Read data from a file
* on entry. On return, *count contains the number of bytes actually read. * @rqstp: RPC transaction context
* @fhp: file handle of file to be read
* @offset: starting byte offset
* @count: IN: requested number of bytes; OUT: number of bytes read
* @eof: OUT: set non-zero if operation reached the end of the file
*
* The caller must verify that there is enough space in @rqstp.rq_res
* to perform this operation.
*
* N.B. After this call fhp needs an fh_put * N.B. After this call fhp needs an fh_put
*
* Returns nfs_ok on success, otherwise an nfserr stat value is
* returned.
*/ */
__be32 nfsd_read(struct svc_rqst *rqstp, struct svc_fh *fhp, __be32 nfsd_read(struct svc_rqst *rqstp, struct svc_fh *fhp,
loff_t offset, struct kvec *vec, int vlen, unsigned long *count, loff_t offset, unsigned long *count, u32 *eof)
u32 *eof)
{ {
struct nfsd_file *nf; struct nfsd_file *nf;
struct file *file; struct file *file;
@ -1181,12 +1233,10 @@ __be32 nfsd_read(struct svc_rqst *rqstp, struct svc_fh *fhp,
if (file->f_op->splice_read && test_bit(RQ_SPLICE_OK, &rqstp->rq_flags)) if (file->f_op->splice_read && test_bit(RQ_SPLICE_OK, &rqstp->rq_flags))
err = nfsd_splice_read(rqstp, fhp, file, offset, count, eof); err = nfsd_splice_read(rqstp, fhp, file, offset, count, eof);
else else
err = nfsd_readv(rqstp, fhp, file, offset, vec, vlen, count, eof); err = nfsd_iter_read(rqstp, fhp, file, offset, count, 0, eof);
nfsd_file_put(nf); nfsd_file_put(nf);
trace_nfsd_read_done(rqstp, fhp, offset, *count); trace_nfsd_read_done(rqstp, fhp, offset, *count);
return err; return err;
} }

View File

@ -110,13 +110,12 @@ __be32 nfsd_splice_read(struct svc_rqst *rqstp, struct svc_fh *fhp,
struct file *file, loff_t offset, struct file *file, loff_t offset,
unsigned long *count, unsigned long *count,
u32 *eof); u32 *eof);
__be32 nfsd_readv(struct svc_rqst *rqstp, struct svc_fh *fhp, __be32 nfsd_iter_read(struct svc_rqst *rqstp, struct svc_fh *fhp,
struct file *file, loff_t offset, struct file *file, loff_t offset,
struct kvec *vec, int vlen, unsigned long *count, unsigned int base,
unsigned long *count,
u32 *eof); u32 *eof);
__be32 nfsd_read(struct svc_rqst *, struct svc_fh *, __be32 nfsd_read(struct svc_rqst *rqstp, struct svc_fh *fhp,
loff_t, struct kvec *, int, unsigned long *, loff_t offset, unsigned long *count,
u32 *eof); u32 *eof);
__be32 nfsd_write(struct svc_rqst *, struct svc_fh *, loff_t, __be32 nfsd_write(struct svc_rqst *, struct svc_fh *, loff_t,
struct kvec *, int, unsigned long *, struct kvec *, int, unsigned long *,

View File

@ -508,6 +508,27 @@ static inline void svcxdr_init_encode(struct svc_rqst *rqstp)
xdr->rqst = NULL; xdr->rqst = NULL;
} }
/**
* svcxdr_encode_opaque_pages - Insert pages into an xdr_stream
* @xdr: xdr_stream to be updated
* @pages: array of pages to insert
* @base: starting offset of first data byte in @pages
* @len: number of data bytes in @pages to insert
*
* After the @pages are added, the tail iovec is instantiated pointing
* to end of the head buffer, and the stream is set up to encode
* subsequent items into the tail.
*/
static inline void svcxdr_encode_opaque_pages(struct svc_rqst *rqstp,
struct xdr_stream *xdr,
struct page **pages,
unsigned int base,
unsigned int len)
{
xdr_write_pages(xdr, pages, base, len);
xdr->page_ptr = rqstp->rq_next_page - 1;
}
/** /**
* svcxdr_set_auth_slack - * svcxdr_set_auth_slack -
* @rqstp: RPC transaction * @rqstp: RPC transaction

View File

@ -135,7 +135,6 @@ struct svc_rdma_recv_ctxt {
struct ib_sge rc_recv_sge; struct ib_sge rc_recv_sge;
void *rc_recv_buf; void *rc_recv_buf;
struct xdr_stream rc_stream; struct xdr_stream rc_stream;
bool rc_temp;
u32 rc_byte_len; u32 rc_byte_len;
unsigned int rc_page_count; unsigned int rc_page_count;
u32 rc_inv_rkey; u32 rc_inv_rkey;
@ -155,12 +154,12 @@ struct svc_rdma_send_ctxt {
struct ib_send_wr sc_send_wr; struct ib_send_wr sc_send_wr;
struct ib_cqe sc_cqe; struct ib_cqe sc_cqe;
struct completion sc_done;
struct xdr_buf sc_hdrbuf; struct xdr_buf sc_hdrbuf;
struct xdr_stream sc_stream; struct xdr_stream sc_stream;
void *sc_xprt_buf; void *sc_xprt_buf;
int sc_page_count;
int sc_cur_sge_no; int sc_cur_sge_no;
struct page *sc_pages[RPCSVC_MAXPAGES];
struct ib_sge sc_sges[]; struct ib_sge sc_sges[];
}; };

View File

@ -242,8 +242,7 @@ extern void xdr_init_encode(struct xdr_stream *xdr, struct xdr_buf *buf,
extern void xdr_init_encode_pages(struct xdr_stream *xdr, struct xdr_buf *buf, extern void xdr_init_encode_pages(struct xdr_stream *xdr, struct xdr_buf *buf,
struct page **pages, struct rpc_rqst *rqst); struct page **pages, struct rpc_rqst *rqst);
extern __be32 *xdr_reserve_space(struct xdr_stream *xdr, size_t nbytes); extern __be32 *xdr_reserve_space(struct xdr_stream *xdr, size_t nbytes);
extern int xdr_reserve_space_vec(struct xdr_stream *xdr, struct kvec *vec, extern int xdr_reserve_space_vec(struct xdr_stream *xdr, size_t nbytes);
size_t nbytes);
extern void __xdr_commit_encode(struct xdr_stream *xdr); extern void __xdr_commit_encode(struct xdr_stream *xdr);
extern void xdr_truncate_encode(struct xdr_stream *xdr, size_t len); extern void xdr_truncate_encode(struct xdr_stream *xdr, size_t len);
extern void xdr_truncate_decode(struct xdr_stream *xdr, size_t len); extern void xdr_truncate_decode(struct xdr_stream *xdr, size_t len);

View File

@ -2112,6 +2112,14 @@ DEFINE_POST_CHUNK_EVENT(read);
DEFINE_POST_CHUNK_EVENT(write); DEFINE_POST_CHUNK_EVENT(write);
DEFINE_POST_CHUNK_EVENT(reply); DEFINE_POST_CHUNK_EVENT(reply);
DEFINE_EVENT(svcrdma_post_chunk_class, svcrdma_cc_release,
TP_PROTO(
const struct rpc_rdma_cid *cid,
int sqecount
),
TP_ARGS(cid, sqecount)
);
TRACE_EVENT(svcrdma_wc_read, TRACE_EVENT(svcrdma_wc_read,
TP_PROTO( TP_PROTO(
const struct ib_wc *wc, const struct ib_wc *wc,

View File

@ -2104,31 +2104,46 @@ DEFINE_SVC_DEFERRED_EVENT(drop);
DEFINE_SVC_DEFERRED_EVENT(queue); DEFINE_SVC_DEFERRED_EVENT(queue);
DEFINE_SVC_DEFERRED_EVENT(recv); DEFINE_SVC_DEFERRED_EVENT(recv);
TRACE_EVENT(svcsock_new_socket, DECLARE_EVENT_CLASS(svcsock_lifetime_class,
TP_PROTO( TP_PROTO(
const void *svsk,
const struct socket *socket const struct socket *socket
), ),
TP_ARGS(svsk, socket),
TP_ARGS(socket),
TP_STRUCT__entry( TP_STRUCT__entry(
__field(unsigned int, netns_ino)
__field(const void *, svsk)
__field(const void *, sk)
__field(unsigned long, type) __field(unsigned long, type)
__field(unsigned long, family) __field(unsigned long, family)
__field(bool, listener) __field(unsigned long, state)
), ),
TP_fast_assign( TP_fast_assign(
__entry->type = socket->type; struct sock *sk = socket->sk;
__entry->family = socket->sk->sk_family;
__entry->listener = (socket->sk->sk_state == TCP_LISTEN);
),
TP_printk("type=%s family=%s%s", __entry->netns_ino = sock_net(sk)->ns.inum;
show_socket_type(__entry->type), __entry->svsk = svsk;
__entry->sk = sk;
__entry->type = socket->type;
__entry->family = sk->sk_family;
__entry->state = sk->sk_state;
),
TP_printk("svsk=%p type=%s family=%s%s",
__entry->svsk, show_socket_type(__entry->type),
rpc_show_address_family(__entry->family), rpc_show_address_family(__entry->family),
__entry->listener ? " (listener)" : "" __entry->state == TCP_LISTEN ? " (listener)" : ""
) )
); );
#define DEFINE_SVCSOCK_LIFETIME_EVENT(name) \
DEFINE_EVENT(svcsock_lifetime_class, name, \
TP_PROTO( \
const void *svsk, \
const struct socket *socket \
), \
TP_ARGS(svsk, socket))
DEFINE_SVCSOCK_LIFETIME_EVENT(svcsock_new);
DEFINE_SVCSOCK_LIFETIME_EVENT(svcsock_free);
TRACE_EVENT(svcsock_marker, TRACE_EVENT(svcsock_marker,
TP_PROTO( TP_PROTO(

View File

@ -109,15 +109,15 @@ param_get_pool_mode(char *buf, const struct kernel_param *kp)
switch (*ip) switch (*ip)
{ {
case SVC_POOL_AUTO: case SVC_POOL_AUTO:
return strlcpy(buf, "auto\n", 20); return sysfs_emit(buf, "auto\n");
case SVC_POOL_GLOBAL: case SVC_POOL_GLOBAL:
return strlcpy(buf, "global\n", 20); return sysfs_emit(buf, "global\n");
case SVC_POOL_PERCPU: case SVC_POOL_PERCPU:
return strlcpy(buf, "percpu\n", 20); return sysfs_emit(buf, "percpu\n");
case SVC_POOL_PERNODE: case SVC_POOL_PERNODE:
return strlcpy(buf, "pernode\n", 20); return sysfs_emit(buf, "pernode\n");
default: default:
return sprintf(buf, "%d\n", *ip); return sysfs_emit(buf, "%d\n", *ip);
} }
} }
@ -597,34 +597,25 @@ svc_destroy(struct kref *ref)
} }
EXPORT_SYMBOL_GPL(svc_destroy); EXPORT_SYMBOL_GPL(svc_destroy);
/* static bool
* Allocate an RPC server's buffer space.
* We allocate pages and place them in rq_pages.
*/
static int
svc_init_buffer(struct svc_rqst *rqstp, unsigned int size, int node) svc_init_buffer(struct svc_rqst *rqstp, unsigned int size, int node)
{ {
unsigned int pages, arghi; unsigned long pages, ret;
/* bc_xprt uses fore channel allocated buffers */ /* bc_xprt uses fore channel allocated buffers */
if (svc_is_backchannel(rqstp)) if (svc_is_backchannel(rqstp))
return 1; return true;
pages = size / PAGE_SIZE + 1; /* extra page as we hold both request and reply. pages = size / PAGE_SIZE + 1; /* extra page as we hold both request and reply.
* We assume one is at most one page * We assume one is at most one page
*/ */
arghi = 0;
WARN_ON_ONCE(pages > RPCSVC_MAXPAGES); WARN_ON_ONCE(pages > RPCSVC_MAXPAGES);
if (pages > RPCSVC_MAXPAGES) if (pages > RPCSVC_MAXPAGES)
pages = RPCSVC_MAXPAGES; pages = RPCSVC_MAXPAGES;
while (pages) {
struct page *p = alloc_pages_node(node, GFP_KERNEL, 0); ret = alloc_pages_bulk_array_node(GFP_KERNEL, node, pages,
if (!p) rqstp->rq_pages);
break; return ret == pages;
rqstp->rq_pages[arghi++] = p;
pages--;
}
return pages == 0;
} }
/* /*
@ -1173,6 +1164,7 @@ static void __svc_unregister(struct net *net, const u32 program, const u32 versi
*/ */
static void svc_unregister(const struct svc_serv *serv, struct net *net) static void svc_unregister(const struct svc_serv *serv, struct net *net)
{ {
struct sighand_struct *sighand;
struct svc_program *progp; struct svc_program *progp;
unsigned long flags; unsigned long flags;
unsigned int i; unsigned int i;
@ -1189,9 +1181,12 @@ static void svc_unregister(const struct svc_serv *serv, struct net *net)
} }
} }
spin_lock_irqsave(&current->sighand->siglock, flags); rcu_read_lock();
sighand = rcu_dereference(current->sighand);
spin_lock_irqsave(&sighand->siglock, flags);
recalc_sigpending(); recalc_sigpending();
spin_unlock_irqrestore(&current->sighand->siglock, flags); spin_unlock_irqrestore(&sighand->siglock, flags);
rcu_read_unlock();
} }
/* /*

View File

@ -74,13 +74,18 @@ static LIST_HEAD(svc_xprt_class_list);
* that no other thread will be using the transport or will * that no other thread will be using the transport or will
* try to set XPT_DEAD. * try to set XPT_DEAD.
*/ */
/**
* svc_reg_xprt_class - Register a server-side RPC transport class
* @xcl: New transport class to be registered
*
* Returns zero on success; otherwise a negative errno is returned.
*/
int svc_reg_xprt_class(struct svc_xprt_class *xcl) int svc_reg_xprt_class(struct svc_xprt_class *xcl)
{ {
struct svc_xprt_class *cl; struct svc_xprt_class *cl;
int res = -EEXIST; int res = -EEXIST;
dprintk("svc: Adding svc transport class '%s'\n", xcl->xcl_name);
INIT_LIST_HEAD(&xcl->xcl_list); INIT_LIST_HEAD(&xcl->xcl_list);
spin_lock(&svc_xprt_class_lock); spin_lock(&svc_xprt_class_lock);
/* Make sure there isn't already a class with the same name */ /* Make sure there isn't already a class with the same name */
@ -96,9 +101,13 @@ out:
} }
EXPORT_SYMBOL_GPL(svc_reg_xprt_class); EXPORT_SYMBOL_GPL(svc_reg_xprt_class);
/**
* svc_unreg_xprt_class - Unregister a server-side RPC transport class
* @xcl: Transport class to be unregistered
*
*/
void svc_unreg_xprt_class(struct svc_xprt_class *xcl) void svc_unreg_xprt_class(struct svc_xprt_class *xcl)
{ {
dprintk("svc: Removing svc transport class '%s'\n", xcl->xcl_name);
spin_lock(&svc_xprt_class_lock); spin_lock(&svc_xprt_class_lock);
list_del_init(&xcl->xcl_list); list_del_init(&xcl->xcl_list);
spin_unlock(&svc_xprt_class_lock); spin_unlock(&svc_xprt_class_lock);
@ -685,8 +694,9 @@ static int svc_alloc_arg(struct svc_rqst *rqstp)
} }
for (filled = 0; filled < pages; filled = ret) { for (filled = 0; filled < pages; filled = ret) {
ret = alloc_pages_bulk_array(GFP_KERNEL, pages, ret = alloc_pages_bulk_array_node(GFP_KERNEL,
rqstp->rq_pages); rqstp->rq_pool->sp_id,
pages, rqstp->rq_pages);
if (ret > filled) if (ret > filled)
/* Made progress, don't sleep yet */ /* Made progress, don't sleep yet */
continue; continue;
@ -843,15 +853,11 @@ static int svc_handle_xprt(struct svc_rqst *rqstp, struct svc_xprt *xprt)
svc_xprt_received(xprt); svc_xprt_received(xprt);
} else if (svc_xprt_reserve_slot(rqstp, xprt)) { } else if (svc_xprt_reserve_slot(rqstp, xprt)) {
/* XPT_DATA|XPT_DEFERRED case: */ /* XPT_DATA|XPT_DEFERRED case: */
dprintk("svc: server %p, pool %u, transport %p, inuse=%d\n",
rqstp, rqstp->rq_pool->sp_id, xprt,
kref_read(&xprt->xpt_ref));
rqstp->rq_deferred = svc_deferred_dequeue(xprt); rqstp->rq_deferred = svc_deferred_dequeue(xprt);
if (rqstp->rq_deferred) if (rqstp->rq_deferred)
len = svc_deferred_recv(rqstp); len = svc_deferred_recv(rqstp);
else else
len = xprt->xpt_ops->xpo_recvfrom(rqstp); len = xprt->xpt_ops->xpo_recvfrom(rqstp);
rqstp->rq_stime = ktime_get();
rqstp->rq_reserved = serv->sv_max_mesg; rqstp->rq_reserved = serv->sv_max_mesg;
atomic_add(rqstp->rq_reserved, &xprt->xpt_reserved); atomic_add(rqstp->rq_reserved, &xprt->xpt_reserved);
} else } else
@ -894,6 +900,7 @@ int svc_recv(struct svc_rqst *rqstp, long timeout)
err = -EAGAIN; err = -EAGAIN;
if (len <= 0) if (len <= 0)
goto out_release; goto out_release;
trace_svc_xdr_recvfrom(&rqstp->rq_arg); trace_svc_xdr_recvfrom(&rqstp->rq_arg);
clear_bit(XPT_OLD, &xprt->xpt_flags); clear_bit(XPT_OLD, &xprt->xpt_flags);
@ -902,6 +909,7 @@ int svc_recv(struct svc_rqst *rqstp, long timeout)
if (serv->sv_stats) if (serv->sv_stats)
serv->sv_stats->netcnt++; serv->sv_stats->netcnt++;
rqstp->rq_stime = ktime_get();
return len; return len;
out_release: out_release:
rqstp->rq_res.len = 0; rqstp->rq_res.len = 0;

View File

@ -826,12 +826,6 @@ static void svc_tcp_listen_data_ready(struct sock *sk)
trace_sk_data_ready(sk); trace_sk_data_ready(sk);
if (svsk) {
/* Refer to svc_setup_socket() for details. */
rmb();
svsk->sk_odata(sk);
}
/* /*
* This callback may called twice when a new connection * This callback may called twice when a new connection
* is established as a child socket inherits everything * is established as a child socket inherits everything
@ -840,13 +834,18 @@ static void svc_tcp_listen_data_ready(struct sock *sk)
* when one of child sockets become ESTABLISHED. * when one of child sockets become ESTABLISHED.
* 2) data_ready method of the child socket may be called * 2) data_ready method of the child socket may be called
* when it receives data before the socket is accepted. * when it receives data before the socket is accepted.
* In case of 2, we should ignore it silently. * In case of 2, we should ignore it silently and DO NOT
* dereference svsk.
*/ */
if (sk->sk_state == TCP_LISTEN) { if (sk->sk_state != TCP_LISTEN)
if (svsk) { return;
set_bit(XPT_CONN, &svsk->sk_xprt.xpt_flags);
svc_xprt_enqueue(&svsk->sk_xprt); if (svsk) {
} /* Refer to svc_setup_socket() for details. */
rmb();
svsk->sk_odata(sk);
set_bit(XPT_CONN, &svsk->sk_xprt.xpt_flags);
svc_xprt_enqueue(&svsk->sk_xprt);
} }
} }
@ -887,13 +886,8 @@ static struct svc_xprt *svc_tcp_accept(struct svc_xprt *xprt)
clear_bit(XPT_CONN, &svsk->sk_xprt.xpt_flags); clear_bit(XPT_CONN, &svsk->sk_xprt.xpt_flags);
err = kernel_accept(sock, &newsock, O_NONBLOCK); err = kernel_accept(sock, &newsock, O_NONBLOCK);
if (err < 0) { if (err < 0) {
if (err == -ENOMEM) if (err != -EAGAIN)
printk(KERN_WARNING "%s: no more sockets!\n", trace_svcsock_accept_err(xprt, serv->sv_name, err);
serv->sv_name);
else if (err != -EAGAIN)
net_warn_ratelimited("%s: accept failed (err %d)!\n",
serv->sv_name, -err);
trace_svcsock_accept_err(xprt, serv->sv_name, err);
return NULL; return NULL;
} }
if (IS_ERR(sock_alloc_file(newsock, O_NONBLOCK, NULL))) if (IS_ERR(sock_alloc_file(newsock, O_NONBLOCK, NULL)))
@ -1464,7 +1458,7 @@ static struct svc_sock *svc_setup_socket(struct svc_serv *serv,
svsk->sk_owspace = inet->sk_write_space; svsk->sk_owspace = inet->sk_write_space;
/* /*
* This barrier is necessary in order to prevent race condition * This barrier is necessary in order to prevent race condition
* with svc_data_ready(), svc_listen_data_ready() and others * with svc_data_ready(), svc_tcp_listen_data_ready(), and others
* when calling callbacks above. * when calling callbacks above.
*/ */
wmb(); wmb();
@ -1476,7 +1470,7 @@ static struct svc_sock *svc_setup_socket(struct svc_serv *serv,
else else
svc_tcp_init(svsk, serv); svc_tcp_init(svsk, serv);
trace_svcsock_new_socket(sock); trace_svcsock_new(svsk, sock);
return svsk; return svsk;
} }
@ -1657,6 +1651,8 @@ static void svc_sock_free(struct svc_xprt *xprt)
struct svc_sock *svsk = container_of(xprt, struct svc_sock, sk_xprt); struct svc_sock *svsk = container_of(xprt, struct svc_sock, sk_xprt);
struct socket *sock = svsk->sk_sock; struct socket *sock = svsk->sk_sock;
trace_svcsock_free(svsk, sock);
tls_handshake_cancel(sock->sk); tls_handshake_cancel(sock->sk);
if (sock->file) if (sock->file)
sockfd_put(sock); sockfd_put(sock);

View File

@ -1070,22 +1070,22 @@ __be32 * xdr_reserve_space(struct xdr_stream *xdr, size_t nbytes)
} }
EXPORT_SYMBOL_GPL(xdr_reserve_space); EXPORT_SYMBOL_GPL(xdr_reserve_space);
/** /**
* xdr_reserve_space_vec - Reserves a large amount of buffer space for sending * xdr_reserve_space_vec - Reserves a large amount of buffer space for sending
* @xdr: pointer to xdr_stream * @xdr: pointer to xdr_stream
* @vec: pointer to a kvec array
* @nbytes: number of bytes to reserve * @nbytes: number of bytes to reserve
* *
* Reserves enough buffer space to encode 'nbytes' of data and stores the * The size argument passed to xdr_reserve_space() is determined based
* pointers in 'vec'. The size argument passed to xdr_reserve_space() is * on the number of bytes remaining in the current page to avoid
* determined based on the number of bytes remaining in the current page to * invalidating iov_base pointers when xdr_commit_encode() is called.
* avoid invalidating iov_base pointers when xdr_commit_encode() is called. *
* Return values:
* %0: success
* %-EMSGSIZE: not enough space is available in @xdr
*/ */
int xdr_reserve_space_vec(struct xdr_stream *xdr, struct kvec *vec, size_t nbytes) int xdr_reserve_space_vec(struct xdr_stream *xdr, size_t nbytes)
{ {
int thislen; size_t thislen;
int v = 0;
__be32 *p; __be32 *p;
/* /*
@ -1097,21 +1097,19 @@ int xdr_reserve_space_vec(struct xdr_stream *xdr, struct kvec *vec, size_t nbyte
xdr->end = xdr->p; xdr->end = xdr->p;
} }
/* XXX: Let's find a way to make this more efficient */
while (nbytes) { while (nbytes) {
thislen = xdr->buf->page_len % PAGE_SIZE; thislen = xdr->buf->page_len % PAGE_SIZE;
thislen = min_t(size_t, nbytes, PAGE_SIZE - thislen); thislen = min_t(size_t, nbytes, PAGE_SIZE - thislen);
p = xdr_reserve_space(xdr, thislen); p = xdr_reserve_space(xdr, thislen);
if (!p) if (!p)
return -EIO; return -EMSGSIZE;
vec[v].iov_base = p;
vec[v].iov_len = thislen;
v++;
nbytes -= thislen; nbytes -= thislen;
} }
return v; return 0;
} }
EXPORT_SYMBOL_GPL(xdr_reserve_space_vec); EXPORT_SYMBOL_GPL(xdr_reserve_space_vec);

View File

@ -93,13 +93,7 @@ static int svc_rdma_bc_sendto(struct svcxprt_rdma *rdma,
*/ */
get_page(virt_to_page(rqst->rq_buffer)); get_page(virt_to_page(rqst->rq_buffer));
sctxt->sc_send_wr.opcode = IB_WR_SEND; sctxt->sc_send_wr.opcode = IB_WR_SEND;
ret = svc_rdma_send(rdma, sctxt); return svc_rdma_send(rdma, sctxt);
if (ret < 0)
return ret;
ret = wait_for_completion_killable(&sctxt->sc_done);
svc_rdma_send_ctxt_put(rdma, sctxt);
return ret;
} }
/* Server-side transport endpoint wants a whole page for its send /* Server-side transport endpoint wants a whole page for its send

View File

@ -125,14 +125,15 @@ static void svc_rdma_recv_cid_init(struct svcxprt_rdma *rdma,
static struct svc_rdma_recv_ctxt * static struct svc_rdma_recv_ctxt *
svc_rdma_recv_ctxt_alloc(struct svcxprt_rdma *rdma) svc_rdma_recv_ctxt_alloc(struct svcxprt_rdma *rdma)
{ {
int node = ibdev_to_node(rdma->sc_cm_id->device);
struct svc_rdma_recv_ctxt *ctxt; struct svc_rdma_recv_ctxt *ctxt;
dma_addr_t addr; dma_addr_t addr;
void *buffer; void *buffer;
ctxt = kmalloc(sizeof(*ctxt), GFP_KERNEL); ctxt = kmalloc_node(sizeof(*ctxt), GFP_KERNEL, node);
if (!ctxt) if (!ctxt)
goto fail0; goto fail0;
buffer = kmalloc(rdma->sc_max_req_size, GFP_KERNEL); buffer = kmalloc_node(rdma->sc_max_req_size, GFP_KERNEL, node);
if (!buffer) if (!buffer)
goto fail1; goto fail1;
addr = ib_dma_map_single(rdma->sc_pd->device, buffer, addr = ib_dma_map_single(rdma->sc_pd->device, buffer,
@ -155,7 +156,6 @@ svc_rdma_recv_ctxt_alloc(struct svcxprt_rdma *rdma)
ctxt->rc_recv_sge.length = rdma->sc_max_req_size; ctxt->rc_recv_sge.length = rdma->sc_max_req_size;
ctxt->rc_recv_sge.lkey = rdma->sc_pd->local_dma_lkey; ctxt->rc_recv_sge.lkey = rdma->sc_pd->local_dma_lkey;
ctxt->rc_recv_buf = buffer; ctxt->rc_recv_buf = buffer;
ctxt->rc_temp = false;
return ctxt; return ctxt;
fail2: fail2:
@ -232,10 +232,7 @@ void svc_rdma_recv_ctxt_put(struct svcxprt_rdma *rdma,
pcl_free(&ctxt->rc_write_pcl); pcl_free(&ctxt->rc_write_pcl);
pcl_free(&ctxt->rc_reply_pcl); pcl_free(&ctxt->rc_reply_pcl);
if (!ctxt->rc_temp) llist_add(&ctxt->rc_node, &rdma->sc_recv_ctxts);
llist_add(&ctxt->rc_node, &rdma->sc_recv_ctxts);
else
svc_rdma_recv_ctxt_destroy(rdma, ctxt);
} }
/** /**
@ -258,7 +255,7 @@ void svc_rdma_release_ctxt(struct svc_xprt *xprt, void *vctxt)
} }
static bool svc_rdma_refresh_recvs(struct svcxprt_rdma *rdma, static bool svc_rdma_refresh_recvs(struct svcxprt_rdma *rdma,
unsigned int wanted, bool temp) unsigned int wanted)
{ {
const struct ib_recv_wr *bad_wr = NULL; const struct ib_recv_wr *bad_wr = NULL;
struct svc_rdma_recv_ctxt *ctxt; struct svc_rdma_recv_ctxt *ctxt;
@ -275,7 +272,6 @@ static bool svc_rdma_refresh_recvs(struct svcxprt_rdma *rdma,
break; break;
trace_svcrdma_post_recv(ctxt); trace_svcrdma_post_recv(ctxt);
ctxt->rc_temp = temp;
ctxt->rc_recv_wr.next = recv_chain; ctxt->rc_recv_wr.next = recv_chain;
recv_chain = &ctxt->rc_recv_wr; recv_chain = &ctxt->rc_recv_wr;
rdma->sc_pending_recvs++; rdma->sc_pending_recvs++;
@ -309,7 +305,7 @@ err_free:
*/ */
bool svc_rdma_post_recvs(struct svcxprt_rdma *rdma) bool svc_rdma_post_recvs(struct svcxprt_rdma *rdma)
{ {
return svc_rdma_refresh_recvs(rdma, rdma->sc_max_requests, true); return svc_rdma_refresh_recvs(rdma, rdma->sc_max_requests);
} }
/** /**
@ -343,7 +339,7 @@ static void svc_rdma_wc_receive(struct ib_cq *cq, struct ib_wc *wc)
* client reconnects. * client reconnects.
*/ */
if (rdma->sc_pending_recvs < rdma->sc_max_requests) if (rdma->sc_pending_recvs < rdma->sc_max_requests)
if (!svc_rdma_refresh_recvs(rdma, rdma->sc_recv_batch, false)) if (!svc_rdma_refresh_recvs(rdma, rdma->sc_recv_batch))
goto dropped; goto dropped;
/* All wc fields are now known to be valid */ /* All wc fields are now known to be valid */
@ -775,9 +771,6 @@ static bool svc_rdma_is_reverse_direction_reply(struct svc_xprt *xprt,
* *
* The next ctxt is removed from the "receive" lists. * The next ctxt is removed from the "receive" lists.
* *
* - If the ctxt completes a Read, then finish assembling the Call
* message and return the number of bytes in the message.
*
* - If the ctxt completes a Receive, then construct the Call * - If the ctxt completes a Receive, then construct the Call
* message from the contents of the Receive buffer. * message from the contents of the Receive buffer.
* *
@ -786,7 +779,8 @@ static bool svc_rdma_is_reverse_direction_reply(struct svc_xprt *xprt,
* in the message. * in the message.
* *
* - If there are Read chunks in this message, post Read WRs to * - If there are Read chunks in this message, post Read WRs to
* pull that payload and return 0. * pull that payload. When the Read WRs complete, build the
* full message and return the number of bytes in it.
*/ */
int svc_rdma_recvfrom(struct svc_rqst *rqstp) int svc_rdma_recvfrom(struct svc_rqst *rqstp)
{ {
@ -796,6 +790,12 @@ int svc_rdma_recvfrom(struct svc_rqst *rqstp)
struct svc_rdma_recv_ctxt *ctxt; struct svc_rdma_recv_ctxt *ctxt;
int ret; int ret;
/* Prevent svc_xprt_release() from releasing pages in rq_pages
* when returning 0 or an error.
*/
rqstp->rq_respages = rqstp->rq_pages;
rqstp->rq_next_page = rqstp->rq_respages;
rqstp->rq_xprt_ctxt = NULL; rqstp->rq_xprt_ctxt = NULL;
ctxt = NULL; ctxt = NULL;
@ -819,12 +819,6 @@ int svc_rdma_recvfrom(struct svc_rqst *rqstp)
DMA_FROM_DEVICE); DMA_FROM_DEVICE);
svc_rdma_build_arg_xdr(rqstp, ctxt); svc_rdma_build_arg_xdr(rqstp, ctxt);
/* Prevent svc_xprt_release from releasing pages in rq_pages
* if we return 0 or an error.
*/
rqstp->rq_respages = rqstp->rq_pages;
rqstp->rq_next_page = rqstp->rq_respages;
ret = svc_rdma_xdr_decode_req(&rqstp->rq_arg, ctxt); ret = svc_rdma_xdr_decode_req(&rqstp->rq_arg, ctxt);
if (ret < 0) if (ret < 0)
goto out_err; goto out_err;

View File

@ -62,8 +62,8 @@ svc_rdma_get_rw_ctxt(struct svcxprt_rdma *rdma, unsigned int sges)
if (node) { if (node) {
ctxt = llist_entry(node, struct svc_rdma_rw_ctxt, rw_node); ctxt = llist_entry(node, struct svc_rdma_rw_ctxt, rw_node);
} else { } else {
ctxt = kmalloc(struct_size(ctxt, rw_first_sgl, SG_CHUNK_SIZE), ctxt = kmalloc_node(struct_size(ctxt, rw_first_sgl, SG_CHUNK_SIZE),
GFP_KERNEL); GFP_KERNEL, ibdev_to_node(rdma->sc_cm_id->device));
if (!ctxt) if (!ctxt)
goto out_noctx; goto out_noctx;
@ -84,8 +84,7 @@ out_noctx:
return NULL; return NULL;
} }
static void __svc_rdma_put_rw_ctxt(struct svcxprt_rdma *rdma, static void __svc_rdma_put_rw_ctxt(struct svc_rdma_rw_ctxt *ctxt,
struct svc_rdma_rw_ctxt *ctxt,
struct llist_head *list) struct llist_head *list)
{ {
sg_free_table_chained(&ctxt->rw_sg_table, SG_CHUNK_SIZE); sg_free_table_chained(&ctxt->rw_sg_table, SG_CHUNK_SIZE);
@ -95,7 +94,7 @@ static void __svc_rdma_put_rw_ctxt(struct svcxprt_rdma *rdma,
static void svc_rdma_put_rw_ctxt(struct svcxprt_rdma *rdma, static void svc_rdma_put_rw_ctxt(struct svcxprt_rdma *rdma,
struct svc_rdma_rw_ctxt *ctxt) struct svc_rdma_rw_ctxt *ctxt)
{ {
__svc_rdma_put_rw_ctxt(rdma, ctxt, &rdma->sc_rw_ctxts); __svc_rdma_put_rw_ctxt(ctxt, &rdma->sc_rw_ctxts);
} }
/** /**
@ -191,6 +190,8 @@ static void svc_rdma_cc_release(struct svc_rdma_chunk_ctxt *cc,
struct svc_rdma_rw_ctxt *ctxt; struct svc_rdma_rw_ctxt *ctxt;
LLIST_HEAD(free); LLIST_HEAD(free);
trace_svcrdma_cc_release(&cc->cc_cid, cc->cc_sqecount);
first = last = NULL; first = last = NULL;
while ((ctxt = svc_rdma_next_ctxt(&cc->cc_rwctxts)) != NULL) { while ((ctxt = svc_rdma_next_ctxt(&cc->cc_rwctxts)) != NULL) {
list_del(&ctxt->rw_list); list_del(&ctxt->rw_list);
@ -198,7 +199,7 @@ static void svc_rdma_cc_release(struct svc_rdma_chunk_ctxt *cc,
rdma_rw_ctx_destroy(&ctxt->rw_ctx, rdma->sc_qp, rdma_rw_ctx_destroy(&ctxt->rw_ctx, rdma->sc_qp,
rdma->sc_port_num, ctxt->rw_sg_table.sgl, rdma->sc_port_num, ctxt->rw_sg_table.sgl,
ctxt->rw_nents, dir); ctxt->rw_nents, dir);
__svc_rdma_put_rw_ctxt(rdma, ctxt, &free); __svc_rdma_put_rw_ctxt(ctxt, &free);
ctxt->rw_node.next = first; ctxt->rw_node.next = first;
first = &ctxt->rw_node; first = &ctxt->rw_node;
@ -234,7 +235,8 @@ svc_rdma_write_info_alloc(struct svcxprt_rdma *rdma,
{ {
struct svc_rdma_write_info *info; struct svc_rdma_write_info *info;
info = kmalloc(sizeof(*info), GFP_KERNEL); info = kmalloc_node(sizeof(*info), GFP_KERNEL,
ibdev_to_node(rdma->sc_cm_id->device));
if (!info) if (!info)
return info; return info;
@ -304,7 +306,8 @@ svc_rdma_read_info_alloc(struct svcxprt_rdma *rdma)
{ {
struct svc_rdma_read_info *info; struct svc_rdma_read_info *info;
info = kmalloc(sizeof(*info), GFP_KERNEL); info = kmalloc_node(sizeof(*info), GFP_KERNEL,
ibdev_to_node(rdma->sc_cm_id->device));
if (!info) if (!info)
return info; return info;
@ -351,8 +354,7 @@ static void svc_rdma_wc_read_done(struct ib_cq *cq, struct ib_wc *wc)
return; return;
} }
/* This function sleeps when the transport's Send Queue is congested. /*
*
* Assumptions: * Assumptions:
* - If ib_post_send() succeeds, only one completion is expected, * - If ib_post_send() succeeds, only one completion is expected,
* even if one or more WRs are flushed. This is true when posting * even if one or more WRs are flushed. This is true when posting
@ -367,6 +369,8 @@ static int svc_rdma_post_chunk_ctxt(struct svc_rdma_chunk_ctxt *cc)
struct ib_cqe *cqe; struct ib_cqe *cqe;
int ret; int ret;
might_sleep();
if (cc->cc_sqecount > rdma->sc_sq_depth) if (cc->cc_sqecount > rdma->sc_sq_depth)
return -EINVAL; return -EINVAL;

View File

@ -123,18 +123,17 @@ static void svc_rdma_send_cid_init(struct svcxprt_rdma *rdma,
static struct svc_rdma_send_ctxt * static struct svc_rdma_send_ctxt *
svc_rdma_send_ctxt_alloc(struct svcxprt_rdma *rdma) svc_rdma_send_ctxt_alloc(struct svcxprt_rdma *rdma)
{ {
int node = ibdev_to_node(rdma->sc_cm_id->device);
struct svc_rdma_send_ctxt *ctxt; struct svc_rdma_send_ctxt *ctxt;
dma_addr_t addr; dma_addr_t addr;
void *buffer; void *buffer;
size_t size;
int i; int i;
size = sizeof(*ctxt); ctxt = kmalloc_node(struct_size(ctxt, sc_sges, rdma->sc_max_send_sges),
size += rdma->sc_max_send_sges * sizeof(struct ib_sge); GFP_KERNEL, node);
ctxt = kmalloc(size, GFP_KERNEL);
if (!ctxt) if (!ctxt)
goto fail0; goto fail0;
buffer = kmalloc(rdma->sc_max_req_size, GFP_KERNEL); buffer = kmalloc_node(rdma->sc_max_req_size, GFP_KERNEL, node);
if (!buffer) if (!buffer)
goto fail1; goto fail1;
addr = ib_dma_map_single(rdma->sc_pd->device, buffer, addr = ib_dma_map_single(rdma->sc_pd->device, buffer,
@ -148,7 +147,6 @@ svc_rdma_send_ctxt_alloc(struct svcxprt_rdma *rdma)
ctxt->sc_send_wr.wr_cqe = &ctxt->sc_cqe; ctxt->sc_send_wr.wr_cqe = &ctxt->sc_cqe;
ctxt->sc_send_wr.sg_list = ctxt->sc_sges; ctxt->sc_send_wr.sg_list = ctxt->sc_sges;
ctxt->sc_send_wr.send_flags = IB_SEND_SIGNALED; ctxt->sc_send_wr.send_flags = IB_SEND_SIGNALED;
init_completion(&ctxt->sc_done);
ctxt->sc_cqe.done = svc_rdma_wc_send; ctxt->sc_cqe.done = svc_rdma_wc_send;
ctxt->sc_xprt_buf = buffer; ctxt->sc_xprt_buf = buffer;
xdr_buf_init(&ctxt->sc_hdrbuf, ctxt->sc_xprt_buf, xdr_buf_init(&ctxt->sc_hdrbuf, ctxt->sc_xprt_buf,
@ -214,6 +212,7 @@ out:
ctxt->sc_send_wr.num_sge = 0; ctxt->sc_send_wr.num_sge = 0;
ctxt->sc_cur_sge_no = 0; ctxt->sc_cur_sge_no = 0;
ctxt->sc_page_count = 0;
return ctxt; return ctxt;
out_empty: out_empty:
@ -228,6 +227,8 @@ out_empty:
* svc_rdma_send_ctxt_put - Return send_ctxt to free list * svc_rdma_send_ctxt_put - Return send_ctxt to free list
* @rdma: controlling svcxprt_rdma * @rdma: controlling svcxprt_rdma
* @ctxt: object to return to the free list * @ctxt: object to return to the free list
*
* Pages left in sc_pages are DMA unmapped and released.
*/ */
void svc_rdma_send_ctxt_put(struct svcxprt_rdma *rdma, void svc_rdma_send_ctxt_put(struct svcxprt_rdma *rdma,
struct svc_rdma_send_ctxt *ctxt) struct svc_rdma_send_ctxt *ctxt)
@ -235,6 +236,9 @@ void svc_rdma_send_ctxt_put(struct svcxprt_rdma *rdma,
struct ib_device *device = rdma->sc_cm_id->device; struct ib_device *device = rdma->sc_cm_id->device;
unsigned int i; unsigned int i;
if (ctxt->sc_page_count)
release_pages(ctxt->sc_pages, ctxt->sc_page_count);
/* The first SGE contains the transport header, which /* The first SGE contains the transport header, which
* remains mapped until @ctxt is destroyed. * remains mapped until @ctxt is destroyed.
*/ */
@ -281,12 +285,12 @@ static void svc_rdma_wc_send(struct ib_cq *cq, struct ib_wc *wc)
container_of(cqe, struct svc_rdma_send_ctxt, sc_cqe); container_of(cqe, struct svc_rdma_send_ctxt, sc_cqe);
svc_rdma_wake_send_waiters(rdma, 1); svc_rdma_wake_send_waiters(rdma, 1);
complete(&ctxt->sc_done);
if (unlikely(wc->status != IB_WC_SUCCESS)) if (unlikely(wc->status != IB_WC_SUCCESS))
goto flushed; goto flushed;
trace_svcrdma_wc_send(wc, &ctxt->sc_cid); trace_svcrdma_wc_send(wc, &ctxt->sc_cid);
svc_rdma_send_ctxt_put(rdma, ctxt);
return; return;
flushed: flushed:
@ -294,6 +298,7 @@ flushed:
trace_svcrdma_wc_send_err(wc, &ctxt->sc_cid); trace_svcrdma_wc_send_err(wc, &ctxt->sc_cid);
else else
trace_svcrdma_wc_send_flush(wc, &ctxt->sc_cid); trace_svcrdma_wc_send_flush(wc, &ctxt->sc_cid);
svc_rdma_send_ctxt_put(rdma, ctxt);
svc_xprt_deferred_close(&rdma->sc_xprt); svc_xprt_deferred_close(&rdma->sc_xprt);
} }
@ -310,7 +315,7 @@ int svc_rdma_send(struct svcxprt_rdma *rdma, struct svc_rdma_send_ctxt *ctxt)
struct ib_send_wr *wr = &ctxt->sc_send_wr; struct ib_send_wr *wr = &ctxt->sc_send_wr;
int ret; int ret;
reinit_completion(&ctxt->sc_done); might_sleep();
/* Sync the transport header buffer */ /* Sync the transport header buffer */
ib_dma_sync_single_for_device(rdma->sc_pd->device, ib_dma_sync_single_for_device(rdma->sc_pd->device,
@ -799,6 +804,25 @@ int svc_rdma_map_reply_msg(struct svcxprt_rdma *rdma,
svc_rdma_xb_dma_map, &args); svc_rdma_xb_dma_map, &args);
} }
/* The svc_rqst and all resources it owns are released as soon as
* svc_rdma_sendto returns. Transfer pages under I/O to the ctxt
* so they are released by the Send completion handler.
*/
static void svc_rdma_save_io_pages(struct svc_rqst *rqstp,
struct svc_rdma_send_ctxt *ctxt)
{
int i, pages = rqstp->rq_next_page - rqstp->rq_respages;
ctxt->sc_page_count += pages;
for (i = 0; i < pages; i++) {
ctxt->sc_pages[i] = rqstp->rq_respages[i];
rqstp->rq_respages[i] = NULL;
}
/* Prevent svc_xprt_release from releasing pages in rq_pages */
rqstp->rq_next_page = rqstp->rq_respages;
}
/* Prepare the portion of the RPC Reply that will be transmitted /* Prepare the portion of the RPC Reply that will be transmitted
* via RDMA Send. The RPC-over-RDMA transport header is prepared * via RDMA Send. The RPC-over-RDMA transport header is prepared
* in sc_sges[0], and the RPC xdr_buf is prepared in following sges. * in sc_sges[0], and the RPC xdr_buf is prepared in following sges.
@ -828,6 +852,8 @@ static int svc_rdma_send_reply_msg(struct svcxprt_rdma *rdma,
if (ret < 0) if (ret < 0)
return ret; return ret;
svc_rdma_save_io_pages(rqstp, sctxt);
if (rctxt->rc_inv_rkey) { if (rctxt->rc_inv_rkey) {
sctxt->sc_send_wr.opcode = IB_WR_SEND_WITH_INV; sctxt->sc_send_wr.opcode = IB_WR_SEND_WITH_INV;
sctxt->sc_send_wr.ex.invalidate_rkey = rctxt->rc_inv_rkey; sctxt->sc_send_wr.ex.invalidate_rkey = rctxt->rc_inv_rkey;
@ -835,13 +861,7 @@ static int svc_rdma_send_reply_msg(struct svcxprt_rdma *rdma,
sctxt->sc_send_wr.opcode = IB_WR_SEND; sctxt->sc_send_wr.opcode = IB_WR_SEND;
} }
ret = svc_rdma_send(rdma, sctxt); return svc_rdma_send(rdma, sctxt);
if (ret < 0)
return ret;
ret = wait_for_completion_killable(&sctxt->sc_done);
svc_rdma_send_ctxt_put(rdma, sctxt);
return ret;
} }
/** /**
@ -907,8 +927,7 @@ void svc_rdma_send_error_msg(struct svcxprt_rdma *rdma,
sctxt->sc_sges[0].length = sctxt->sc_hdrbuf.len; sctxt->sc_sges[0].length = sctxt->sc_hdrbuf.len;
if (svc_rdma_send(rdma, sctxt)) if (svc_rdma_send(rdma, sctxt))
goto put_ctxt; goto put_ctxt;
return;
wait_for_completion_killable(&sctxt->sc_done);
put_ctxt: put_ctxt:
svc_rdma_send_ctxt_put(rdma, sctxt); svc_rdma_send_ctxt_put(rdma, sctxt);
@ -976,17 +995,16 @@ int svc_rdma_sendto(struct svc_rqst *rqstp)
ret = svc_rdma_send_reply_msg(rdma, sctxt, rctxt, rqstp); ret = svc_rdma_send_reply_msg(rdma, sctxt, rctxt, rqstp);
if (ret < 0) if (ret < 0)
goto put_ctxt; goto put_ctxt;
/* Prevent svc_xprt_release() from releasing the page backing
* rq_res.head[0].iov_base. It's no longer being accessed by
* the I/O device. */
rqstp->rq_respages++;
return 0; return 0;
reply_chunk: reply_chunk:
if (ret != -E2BIG && ret != -EINVAL) if (ret != -E2BIG && ret != -EINVAL)
goto put_ctxt; goto put_ctxt;
/* Send completion releases payload pages that were part
* of previously posted RDMA Writes.
*/
svc_rdma_save_io_pages(rqstp, sctxt);
svc_rdma_send_error_msg(rdma, sctxt, rctxt, ret); svc_rdma_send_error_msg(rdma, sctxt, rctxt, ret);
return 0; return 0;

View File

@ -64,7 +64,7 @@
#define RPCDBG_FACILITY RPCDBG_SVCXPRT #define RPCDBG_FACILITY RPCDBG_SVCXPRT
static struct svcxprt_rdma *svc_rdma_create_xprt(struct svc_serv *serv, static struct svcxprt_rdma *svc_rdma_create_xprt(struct svc_serv *serv,
struct net *net); struct net *net, int node);
static struct svc_xprt *svc_rdma_create(struct svc_serv *serv, static struct svc_xprt *svc_rdma_create(struct svc_serv *serv,
struct net *net, struct net *net,
struct sockaddr *sa, int salen, struct sockaddr *sa, int salen,
@ -123,14 +123,14 @@ static void qp_event_handler(struct ib_event *event, void *context)
} }
static struct svcxprt_rdma *svc_rdma_create_xprt(struct svc_serv *serv, static struct svcxprt_rdma *svc_rdma_create_xprt(struct svc_serv *serv,
struct net *net) struct net *net, int node)
{ {
struct svcxprt_rdma *cma_xprt = kzalloc(sizeof *cma_xprt, GFP_KERNEL); struct svcxprt_rdma *cma_xprt;
if (!cma_xprt) { cma_xprt = kzalloc_node(sizeof(*cma_xprt), GFP_KERNEL, node);
dprintk("svcrdma: failed to create new transport\n"); if (!cma_xprt)
return NULL; return NULL;
}
svc_xprt_init(net, &svc_rdma_class, &cma_xprt->sc_xprt, serv); svc_xprt_init(net, &svc_rdma_class, &cma_xprt->sc_xprt, serv);
INIT_LIST_HEAD(&cma_xprt->sc_accept_q); INIT_LIST_HEAD(&cma_xprt->sc_accept_q);
INIT_LIST_HEAD(&cma_xprt->sc_rq_dto_q); INIT_LIST_HEAD(&cma_xprt->sc_rq_dto_q);
@ -193,9 +193,9 @@ static void handle_connect_req(struct rdma_cm_id *new_cma_id,
struct svcxprt_rdma *newxprt; struct svcxprt_rdma *newxprt;
struct sockaddr *sa; struct sockaddr *sa;
/* Create a new transport */
newxprt = svc_rdma_create_xprt(listen_xprt->sc_xprt.xpt_server, newxprt = svc_rdma_create_xprt(listen_xprt->sc_xprt.xpt_server,
listen_xprt->sc_xprt.xpt_net); listen_xprt->sc_xprt.xpt_net,
ibdev_to_node(new_cma_id->device));
if (!newxprt) if (!newxprt)
return; return;
newxprt->sc_cm_id = new_cma_id; newxprt->sc_cm_id = new_cma_id;
@ -304,7 +304,7 @@ static struct svc_xprt *svc_rdma_create(struct svc_serv *serv,
if (sa->sa_family != AF_INET && sa->sa_family != AF_INET6) if (sa->sa_family != AF_INET && sa->sa_family != AF_INET6)
return ERR_PTR(-EAFNOSUPPORT); return ERR_PTR(-EAFNOSUPPORT);
cma_xprt = svc_rdma_create_xprt(serv, net); cma_xprt = svc_rdma_create_xprt(serv, net, NUMA_NO_NODE);
if (!cma_xprt) if (!cma_xprt)
return ERR_PTR(-ENOMEM); return ERR_PTR(-ENOMEM);
set_bit(XPT_LISTENER, &cma_xprt->sc_xprt.xpt_flags); set_bit(XPT_LISTENER, &cma_xprt->sc_xprt.xpt_flags);