NFSD 6.5 Release Notes

Fixes and clean-ups include:
 - Clean-ups in the READ path in anticipation of MSG_SPLICE_PAGES
 - Better NUMA awareness when allocating pages and other objects
 - A number of minor clean-ups to XDR encoding
 - Elimination of a race when accepting a TCP socket
 - Numerous observability enhancements
 -----BEGIN PGP SIGNATURE-----
 
 iQIzBAABCAAdFiEEKLLlsBKG3yQ88j7+M2qzM29mf5cFAmSVpEQACgkQM2qzM29m
 f5flNQ/+N+GKwJWKkDDK9l0/OJWLSOV1DaUKs04FW79Xa05bqB9W3mnkQ2QzXmBq
 9BUUeAnn0bD+yDgsBd4+l5HMjK91Qm+e+ljE4Gn+hA2Kg5VsBPbNImXQTuAdC/mw
 2YWNqaQ6EjHWmzauAOiKqKwDsYefZaiS3+1CEuOlmXeDK979yU7zXHUbFvLjmNiP
 5ATDrxfIsQza+Je0sh3JGeJtFZCt127zOd6vEQCiedbC8yy7n1ldWi/OGWPsg/2H
 z/QWbs9Iw8ExESosBKfX6M13izJV9eZ69ZfvxmLFGcfZmHoOCOtIXzDDqmD6JsPY
 l4SBtWkP+OB4jePM1nEpFU65EQrGtRK/roGGXNtCTaZAcDBk2/jJ9b5gejkSDsaU
 B3alN0UwECeEzQ3whYYAGy4m1FPaQcFfxl1RydcvYEfRiuiYxZb/3EO62dDavv14
 WTYUGHNxso48DGngyO2xRBIda0Kwqc4vgkzOww6SP+Eok/22q/CsKHiuzRgnrLq6
 GYdRX0Zmvl/0lzuBsDOzpOIQg2DuGdc84fjUCxqQu0DBSkehWo4i2eGNHQuHbMUb
 Tl58/tYiFazVOX1aAKabzKm2iaCjKdLySDhTj+dSgTXKx6SqZl1cXvhFl9xImgYj
 uW9i+ALZO9PNvtpMIvGaHqQK6xyI9F6isgKyNpXwy9J5D6ck7GA=
 =1EQc
 -----END PGP SIGNATURE-----

Merge tag 'nfsd-6.5' of git://git.kernel.org/pub/scm/linux/kernel/git/cel/linux

Pull nfsd updates from Chuck Lever:

 - Clean-ups in the READ path in anticipation of MSG_SPLICE_PAGES

 - Better NUMA awareness when allocating pages and other objects

 - A number of minor clean-ups to XDR encoding

 - Elimination of a race when accepting a TCP socket

 - Numerous observability enhancements

* tag 'nfsd-6.5' of git://git.kernel.org/pub/scm/linux/kernel/git/cel/linux: (46 commits)
  nfsd: remove redundant assignments to variable len
  svcrdma: Fix stale comment
  NFSD: Distinguish per-net namespace initialization
  nfsd: move init of percpu reply_cache_stats counters back to nfsd_init_net
  SUNRPC: Address RCU warning in net/sunrpc/svc.c
  SUNRPC: Use sysfs_emit in place of strlcpy/sprintf
  SUNRPC: Remove transport class dprintk call sites
  SUNRPC: Fix comments for transport class registration
  svcrdma: Remove an unused argument from __svc_rdma_put_rw_ctxt()
  svcrdma: trace cc_release calls
  svcrdma: Convert "might sleep" comment into a code annotation
  NFSD: Add an nfsd4_encode_nfstime4() helper
  SUNRPC: Move initialization of rq_stime
  SUNRPC: Optimize page release in svc_rdma_sendto()
  svcrdma: Prevent page release when nothing was received
  svcrdma: Revert 2a1e4f21d8 ("svcrdma: Normalize Send page handling")
  SUNRPC: Revert 579900670a ("svcrdma: Remove unused sc_pages field")
  SUNRPC: Revert cc93ce9529 ("svcrdma: Retain the page backing rq_res.head[0].iov_base")
  NFSD: add encoding of op_recall flag for write delegation
  NFSD: Add "official" reviewers for this subsystem
  ...
This commit is contained in:
Linus Torvalds 2023-06-26 10:48:57 -07:00
commit f7976a6493
31 changed files with 802 additions and 435 deletions

View File

@ -183,6 +183,8 @@ Henrik Rydberg <rydberg@bitmath.org>
Herbert Xu <herbert@gondor.apana.org.au>
Huacai Chen <chenhuacai@kernel.org> <chenhc@lemote.com>
Huacai Chen <chenhuacai@kernel.org> <chenhuacai@loongson.cn>
J. Bruce Fields <bfields@fieldses.org> <bfields@redhat.com>
J. Bruce Fields <bfields@fieldses.org> <bfields@citi.umich.edu>
Jacob Shin <Jacob.Shin@amd.com>
Jaegeuk Kim <jaegeuk@kernel.org> <jaegeuk@google.com>
Jaegeuk Kim <jaegeuk@kernel.org> <jaegeuk.kim@samsung.com>

View File

@ -11275,6 +11275,10 @@ W: http://kernelnewbies.org/KernelJanitors
KERNEL NFSD, SUNRPC, AND LOCKD SERVERS
M: Chuck Lever <chuck.lever@oracle.com>
M: Jeff Layton <jlayton@kernel.org>
R: Neil Brown <neilb@suse.de>
R: Olga Kornievskaia <kolga@netapp.com>
R: Dai Ngo <Dai.Ngo@oracle.com>
R: Tom Talpey <tom@talpey.com>
L: linux-nfs@vger.kernel.org
S: Supported
W: http://nfs.sourceforge.net/

View File

@ -355,7 +355,6 @@ static int lockd_get(void)
int error;
if (nlmsvc_serv) {
svc_get(nlmsvc_serv);
nlmsvc_users++;
return 0;
}

View File

@ -80,6 +80,8 @@ enum {
int nfsd_drc_slab_create(void);
void nfsd_drc_slab_free(void);
int nfsd_net_reply_cache_init(struct nfsd_net *nn);
void nfsd_net_reply_cache_destroy(struct nfsd_net *nn);
int nfsd_reply_cache_init(struct nfsd_net *);
void nfsd_reply_cache_shutdown(struct nfsd_net *);
int nfsd_cache_lookup(struct svc_rqst *);

View File

@ -97,7 +97,7 @@ static int expkey_parse(struct cache_detail *cd, char *mesg, int mlen)
goto out;
err = -EINVAL;
if ((len=qword_get(&mesg, buf, PAGE_SIZE)) <= 0)
if (qword_get(&mesg, buf, PAGE_SIZE) <= 0)
goto out;
err = -ENOENT;
@ -107,7 +107,7 @@ static int expkey_parse(struct cache_detail *cd, char *mesg, int mlen)
dprintk("found domain %s\n", buf);
err = -EINVAL;
if ((len=qword_get(&mesg, buf, PAGE_SIZE)) <= 0)
if (qword_get(&mesg, buf, PAGE_SIZE) <= 0)
goto out;
fsidtype = simple_strtoul(buf, &ep, 10);
if (*ep)
@ -593,7 +593,6 @@ static int svc_export_parse(struct cache_detail *cd, char *mesg, int mlen)
{
/* client path expiry [flags anonuid anongid fsid] */
char *buf;
int len;
int err;
struct auth_domain *dom = NULL;
struct svc_export exp = {}, *expp;
@ -609,8 +608,7 @@ static int svc_export_parse(struct cache_detail *cd, char *mesg, int mlen)
/* client */
err = -EINVAL;
len = qword_get(&mesg, buf, PAGE_SIZE);
if (len <= 0)
if (qword_get(&mesg, buf, PAGE_SIZE) <= 0)
goto out;
err = -ENOENT;
@ -620,7 +618,7 @@ static int svc_export_parse(struct cache_detail *cd, char *mesg, int mlen)
/* path */
err = -EINVAL;
if ((len = qword_get(&mesg, buf, PAGE_SIZE)) <= 0)
if (qword_get(&mesg, buf, PAGE_SIZE) <= 0)
goto out1;
err = kern_path(buf, 0, &exp.ex_path);
@ -665,7 +663,7 @@ static int svc_export_parse(struct cache_detail *cd, char *mesg, int mlen)
goto out3;
exp.ex_fsid = an_int;
while ((len = qword_get(&mesg, buf, PAGE_SIZE)) > 0) {
while (qword_get(&mesg, buf, PAGE_SIZE) > 0) {
if (strcmp(buf, "fsloc") == 0)
err = fsloc_parse(&mesg, buf, &exp.ex_fslocs);
else if (strcmp(buf, "uuid") == 0)

View File

@ -151,8 +151,6 @@ nfsd3_proc_read(struct svc_rqst *rqstp)
{
struct nfsd3_readargs *argp = rqstp->rq_argp;
struct nfsd3_readres *resp = rqstp->rq_resp;
unsigned int len;
int v;
dprintk("nfsd: READ(3) %s %lu bytes at %Lu\n",
SVCFH_fmt(&argp->fh),
@ -166,17 +164,7 @@ nfsd3_proc_read(struct svc_rqst *rqstp)
if (argp->offset + argp->count > (u64)OFFSET_MAX)
argp->count = (u64)OFFSET_MAX - argp->offset;
v = 0;
len = argp->count;
resp->pages = rqstp->rq_next_page;
while (len > 0) {
struct page *page = *(rqstp->rq_next_page++);
rqstp->rq_vec[v].iov_base = page_address(page);
rqstp->rq_vec[v].iov_len = min_t(unsigned int, len, PAGE_SIZE);
len -= rqstp->rq_vec[v].iov_len;
v++;
}
/* Obtain buffer pointer for payload.
* 1 (status) + 22 (post_op_attr) + 1 (count) + 1 (eof)
@ -187,7 +175,7 @@ nfsd3_proc_read(struct svc_rqst *rqstp)
fh_copy(&resp->fh, &argp->fh);
resp->status = nfsd_read(rqstp, &resp->fh, argp->offset,
rqstp->rq_vec, v, &resp->count, &resp->eof);
&resp->count, &resp->eof);
return rpc_success;
}

View File

@ -828,7 +828,8 @@ nfs3svc_encode_readlinkres(struct svc_rqst *rqstp, struct xdr_stream *xdr)
return false;
if (xdr_stream_encode_u32(xdr, resp->len) < 0)
return false;
xdr_write_pages(xdr, resp->pages, 0, resp->len);
svcxdr_encode_opaque_pages(rqstp, xdr, resp->pages, 0,
resp->len);
if (svc_encode_result_payload(rqstp, head->iov_len, resp->len) < 0)
return false;
break;
@ -859,7 +860,8 @@ nfs3svc_encode_readres(struct svc_rqst *rqstp, struct xdr_stream *xdr)
return false;
if (xdr_stream_encode_u32(xdr, resp->count) < 0)
return false;
xdr_write_pages(xdr, resp->pages, rqstp->rq_res.page_base,
svcxdr_encode_opaque_pages(rqstp, xdr, resp->pages,
rqstp->rq_res.page_base,
resp->count);
if (svc_encode_result_payload(rqstp, head->iov_len, resp->count) < 0)
return false;
@ -961,7 +963,8 @@ nfs3svc_encode_readdirres(struct svc_rqst *rqstp, struct xdr_stream *xdr)
return false;
if (!svcxdr_encode_cookieverf3(xdr, resp->verf))
return false;
xdr_write_pages(xdr, dirlist->pages, 0, dirlist->len);
svcxdr_encode_opaque_pages(rqstp, xdr, dirlist->pages, 0,
dirlist->len);
/* no more entries */
if (xdr_stream_encode_item_absent(xdr) < 0)
return false;

View File

@ -2541,6 +2541,20 @@ static __be32 *encode_change(__be32 *p, struct kstat *stat, struct inode *inode,
return p;
}
static __be32 nfsd4_encode_nfstime4(struct xdr_stream *xdr,
struct timespec64 *tv)
{
__be32 *p;
p = xdr_reserve_space(xdr, XDR_UNIT * 3);
if (!p)
return nfserr_resource;
p = xdr_encode_hyper(p, (s64)tv->tv_sec);
*p = cpu_to_be32(tv->tv_nsec);
return nfs_ok;
}
/*
* ctime (in NFSv4, time_metadata) is not writeable, and the client
* doesn't really care what resolution could theoretically be stored by
@ -2566,12 +2580,16 @@ static __be32 *encode_time_delta(__be32 *p, struct inode *inode)
return p;
}
static __be32 *encode_cinfo(__be32 *p, struct nfsd4_change_info *c)
static __be32
nfsd4_encode_change_info4(struct xdr_stream *xdr, struct nfsd4_change_info *c)
{
*p++ = cpu_to_be32(c->atomic);
p = xdr_encode_hyper(p, c->before_change);
p = xdr_encode_hyper(p, c->after_change);
return p;
if (xdr_stream_encode_bool(xdr, c->atomic) < 0)
return nfserr_resource;
if (xdr_stream_encode_u64(xdr, c->before_change) < 0)
return nfserr_resource;
if (xdr_stream_encode_u64(xdr, c->after_change) < 0)
return nfserr_resource;
return nfs_ok;
}
/* Encode as an array of strings the string given with components
@ -3348,11 +3366,9 @@ out_acl:
p = xdr_encode_hyper(p, dummy64);
}
if (bmval1 & FATTR4_WORD1_TIME_ACCESS) {
p = xdr_reserve_space(xdr, 12);
if (!p)
goto out_resource;
p = xdr_encode_hyper(p, (s64)stat.atime.tv_sec);
*p++ = cpu_to_be32(stat.atime.tv_nsec);
status = nfsd4_encode_nfstime4(xdr, &stat.atime);
if (status)
goto out;
}
if (bmval1 & FATTR4_WORD1_TIME_DELTA) {
p = xdr_reserve_space(xdr, 12);
@ -3361,25 +3377,19 @@ out_acl:
p = encode_time_delta(p, d_inode(dentry));
}
if (bmval1 & FATTR4_WORD1_TIME_METADATA) {
p = xdr_reserve_space(xdr, 12);
if (!p)
goto out_resource;
p = xdr_encode_hyper(p, (s64)stat.ctime.tv_sec);
*p++ = cpu_to_be32(stat.ctime.tv_nsec);
status = nfsd4_encode_nfstime4(xdr, &stat.ctime);
if (status)
goto out;
}
if (bmval1 & FATTR4_WORD1_TIME_MODIFY) {
p = xdr_reserve_space(xdr, 12);
if (!p)
goto out_resource;
p = xdr_encode_hyper(p, (s64)stat.mtime.tv_sec);
*p++ = cpu_to_be32(stat.mtime.tv_nsec);
status = nfsd4_encode_nfstime4(xdr, &stat.mtime);
if (status)
goto out;
}
if (bmval1 & FATTR4_WORD1_TIME_CREATE) {
p = xdr_reserve_space(xdr, 12);
if (!p)
goto out_resource;
p = xdr_encode_hyper(p, (s64)stat.btime.tv_sec);
*p++ = cpu_to_be32(stat.btime.tv_nsec);
status = nfsd4_encode_nfstime4(xdr, &stat.btime);
if (status)
goto out;
}
if (bmval1 & FATTR4_WORD1_MOUNTED_ON_FILEID) {
u64 ino = stat.ino;
@ -3688,6 +3698,30 @@ fail:
return -EINVAL;
}
static __be32
nfsd4_encode_verifier4(struct xdr_stream *xdr, const nfs4_verifier *verf)
{
__be32 *p;
p = xdr_reserve_space(xdr, NFS4_VERIFIER_SIZE);
if (!p)
return nfserr_resource;
memcpy(p, verf->data, sizeof(verf->data));
return nfs_ok;
}
static __be32
nfsd4_encode_clientid4(struct xdr_stream *xdr, const clientid_t *clientid)
{
__be32 *p;
p = xdr_reserve_space(xdr, sizeof(__be64));
if (!p)
return nfserr_resource;
memcpy(p, clientid, sizeof(*clientid));
return nfs_ok;
}
static __be32
nfsd4_encode_stateid(struct xdr_stream *xdr, stateid_t *sid)
{
@ -3752,15 +3786,8 @@ nfsd4_encode_commit(struct nfsd4_compoundres *resp, __be32 nfserr,
union nfsd4_op_u *u)
{
struct nfsd4_commit *commit = &u->commit;
struct xdr_stream *xdr = resp->xdr;
__be32 *p;
p = xdr_reserve_space(xdr, NFS4_VERIFIER_SIZE);
if (!p)
return nfserr_resource;
p = xdr_encode_opaque_fixed(p, commit->co_verf.data,
NFS4_VERIFIER_SIZE);
return 0;
return nfsd4_encode_verifier4(resp->xdr, &commit->co_verf);
}
static __be32
@ -3769,12 +3796,10 @@ nfsd4_encode_create(struct nfsd4_compoundres *resp, __be32 nfserr,
{
struct nfsd4_create *create = &u->create;
struct xdr_stream *xdr = resp->xdr;
__be32 *p;
p = xdr_reserve_space(xdr, 20);
if (!p)
return nfserr_resource;
encode_cinfo(p, &create->cr_cinfo);
nfserr = nfsd4_encode_change_info4(xdr, &create->cr_cinfo);
if (nfserr)
return nfserr;
return nfsd4_encode_bitmap(xdr, create->cr_bmval[0],
create->cr_bmval[1], create->cr_bmval[2]);
}
@ -3892,13 +3917,8 @@ nfsd4_encode_link(struct nfsd4_compoundres *resp, __be32 nfserr,
{
struct nfsd4_link *link = &u->link;
struct xdr_stream *xdr = resp->xdr;
__be32 *p;
p = xdr_reserve_space(xdr, 20);
if (!p)
return nfserr_resource;
p = encode_cinfo(p, &link->li_cinfo);
return 0;
return nfsd4_encode_change_info4(xdr, &link->li_cinfo);
}
@ -3913,11 +3933,11 @@ nfsd4_encode_open(struct nfsd4_compoundres *resp, __be32 nfserr,
nfserr = nfsd4_encode_stateid(xdr, &open->op_stateid);
if (nfserr)
return nfserr;
p = xdr_reserve_space(xdr, 24);
if (!p)
nfserr = nfsd4_encode_change_info4(xdr, &open->op_cinfo);
if (nfserr)
return nfserr;
if (xdr_stream_encode_u32(xdr, open->op_rflags) < 0)
return nfserr_resource;
p = encode_cinfo(p, &open->op_cinfo);
*p++ = cpu_to_be32(open->op_rflags);
nfserr = nfsd4_encode_bitmap(xdr, open->op_bmval[0], open->op_bmval[1],
open->op_bmval[2]);
@ -3956,7 +3976,7 @@ nfsd4_encode_open(struct nfsd4_compoundres *resp, __be32 nfserr,
p = xdr_reserve_space(xdr, 32);
if (!p)
return nfserr_resource;
*p++ = cpu_to_be32(0);
*p++ = cpu_to_be32(open->op_recall);
/*
* TODO: space_limit's in delegations
@ -4018,6 +4038,11 @@ nfsd4_encode_open_downgrade(struct nfsd4_compoundres *resp, __be32 nfserr,
return nfsd4_encode_stateid(xdr, &od->od_stateid);
}
/*
* The operation of this function assumes that this is the only
* READ operation in the COMPOUND. If there are multiple READs,
* we use nfsd4_encode_readv().
*/
static __be32 nfsd4_encode_splice_read(
struct nfsd4_compoundres *resp,
struct nfsd4_read *read,
@ -4028,8 +4053,12 @@ static __be32 nfsd4_encode_splice_read(
int status, space_left;
__be32 nfserr;
/* Make sure there will be room for padding if needed */
if (xdr->end - xdr->p < 1)
/*
* Make sure there is room at the end of buf->head for
* svcxdr_encode_opaque_pages() to create a tail buffer
* to XDR-pad the payload.
*/
if (xdr->iov != xdr->buf->head || xdr->end - xdr->p < 1)
return nfserr_resource;
nfserr = nfsd_splice_read(read->rd_rqstp, read->rd_fhp,
@ -4038,6 +4067,8 @@ static __be32 nfsd4_encode_splice_read(
read->rd_length = maxcount;
if (nfserr)
goto out_err;
svcxdr_encode_opaque_pages(read->rd_rqstp, xdr, buf->pages,
buf->page_base, maxcount);
status = svc_encode_result_payload(read->rd_rqstp,
buf->head[0].iov_len, maxcount);
if (status) {
@ -4045,31 +4076,19 @@ static __be32 nfsd4_encode_splice_read(
goto out_err;
}
buf->page_len = maxcount;
buf->len += maxcount;
xdr->page_ptr += (buf->page_base + maxcount + PAGE_SIZE - 1)
/ PAGE_SIZE;
/* Use rest of head for padding and remaining ops: */
buf->tail[0].iov_base = xdr->p;
buf->tail[0].iov_len = 0;
xdr->iov = buf->tail;
if (maxcount&3) {
int pad = 4 - (maxcount&3);
*(xdr->p++) = 0;
buf->tail[0].iov_base += maxcount&3;
buf->tail[0].iov_len = pad;
buf->len += pad;
}
/*
* Prepare to encode subsequent operations.
*
* xdr_truncate_encode() is not safe to use after a successful
* splice read has been done, so the following stream
* manipulations are open-coded.
*/
space_left = min_t(int, (void *)xdr->end - (void *)xdr->p,
buf->buflen - buf->len);
buf->buflen = buf->len + space_left;
xdr->end = (__be32 *)((void *)xdr->end + space_left);
return 0;
return nfs_ok;
out_err:
/*
@ -4090,12 +4109,12 @@ static __be32 nfsd4_encode_readv(struct nfsd4_compoundres *resp,
__be32 zero = xdr_zero;
__be32 nfserr;
read->rd_vlen = xdr_reserve_space_vec(xdr, resp->rqstp->rq_vec, maxcount);
if (read->rd_vlen < 0)
if (xdr_reserve_space_vec(xdr, maxcount) < 0)
return nfserr_resource;
nfserr = nfsd_readv(resp->rqstp, read->rd_fhp, file, read->rd_offset,
resp->rqstp->rq_vec, read->rd_vlen, &maxcount,
nfserr = nfsd_iter_read(resp->rqstp, read->rd_fhp, file,
read->rd_offset, &maxcount,
xdr->buf->page_len & ~PAGE_MASK,
&read->rd_eof);
read->rd_length = maxcount;
if (nfserr)
@ -4213,15 +4232,9 @@ nfsd4_encode_readdir(struct nfsd4_compoundres *resp, __be32 nfserr,
int starting_len = xdr->buf->len;
__be32 *p;
p = xdr_reserve_space(xdr, NFS4_VERIFIER_SIZE);
if (!p)
return nfserr_resource;
/* XXX: Following NFSv3, we ignore the READDIR verifier for now. */
*p++ = cpu_to_be32(0);
*p++ = cpu_to_be32(0);
xdr->buf->head[0].iov_len = (char *)xdr->p -
(char *)xdr->buf->head[0].iov_base;
nfserr = nfsd4_encode_verifier4(xdr, &readdir->rd_verf);
if (nfserr != nfs_ok)
return nfserr;
/*
* Number of bytes left for directory entries allowing for the
@ -4299,13 +4312,8 @@ nfsd4_encode_remove(struct nfsd4_compoundres *resp, __be32 nfserr,
{
struct nfsd4_remove *remove = &u->remove;
struct xdr_stream *xdr = resp->xdr;
__be32 *p;
p = xdr_reserve_space(xdr, 20);
if (!p)
return nfserr_resource;
p = encode_cinfo(p, &remove->rm_cinfo);
return 0;
return nfsd4_encode_change_info4(xdr, &remove->rm_cinfo);
}
static __be32
@ -4314,14 +4322,11 @@ nfsd4_encode_rename(struct nfsd4_compoundres *resp, __be32 nfserr,
{
struct nfsd4_rename *rename = &u->rename;
struct xdr_stream *xdr = resp->xdr;
__be32 *p;
p = xdr_reserve_space(xdr, 40);
if (!p)
return nfserr_resource;
p = encode_cinfo(p, &rename->rn_sinfo);
p = encode_cinfo(p, &rename->rn_tinfo);
return 0;
nfserr = nfsd4_encode_change_info4(xdr, &rename->rn_sinfo);
if (nfserr)
return nfserr;
return nfsd4_encode_change_info4(xdr, &rename->rn_tinfo);
}
static __be32
@ -4448,23 +4453,25 @@ nfsd4_encode_setclientid(struct nfsd4_compoundres *resp, __be32 nfserr,
{
struct nfsd4_setclientid *scd = &u->setclientid;
struct xdr_stream *xdr = resp->xdr;
__be32 *p;
if (!nfserr) {
p = xdr_reserve_space(xdr, 8 + NFS4_VERIFIER_SIZE);
if (!p)
return nfserr_resource;
p = xdr_encode_opaque_fixed(p, &scd->se_clientid, 8);
p = xdr_encode_opaque_fixed(p, &scd->se_confirm,
NFS4_VERIFIER_SIZE);
nfserr = nfsd4_encode_clientid4(xdr, &scd->se_clientid);
if (nfserr != nfs_ok)
goto out;
nfserr = nfsd4_encode_verifier4(xdr, &scd->se_confirm);
} else if (nfserr == nfserr_clid_inuse) {
/* empty network id */
if (xdr_stream_encode_u32(xdr, 0) < 0) {
nfserr = nfserr_resource;
goto out;
}
else if (nfserr == nfserr_clid_inuse) {
p = xdr_reserve_space(xdr, 8);
if (!p)
return nfserr_resource;
*p++ = cpu_to_be32(0);
*p++ = cpu_to_be32(0);
/* empty universal address */
if (xdr_stream_encode_u32(xdr, 0) < 0) {
nfserr = nfserr_resource;
goto out;
}
}
out:
return nfserr;
}
@ -4473,17 +4480,12 @@ nfsd4_encode_write(struct nfsd4_compoundres *resp, __be32 nfserr,
union nfsd4_op_u *u)
{
struct nfsd4_write *write = &u->write;
struct xdr_stream *xdr = resp->xdr;
__be32 *p;
p = xdr_reserve_space(xdr, 16);
if (!p)
if (xdr_stream_encode_u32(resp->xdr, write->wr_bytes_written) < 0)
return nfserr_resource;
*p++ = cpu_to_be32(write->wr_bytes_written);
*p++ = cpu_to_be32(write->wr_how_written);
p = xdr_encode_opaque_fixed(p, write->wr_verifier.data,
NFS4_VERIFIER_SIZE);
return 0;
if (xdr_stream_encode_u32(resp->xdr, write->wr_how_written) < 0)
return nfserr_resource;
return nfsd4_encode_verifier4(resp->xdr, &write->wr_verifier);
}
static __be32
@ -4505,20 +4507,15 @@ nfsd4_encode_exchange_id(struct nfsd4_compoundres *resp, __be32 nfserr,
server_scope = nn->nfsd_name;
server_scope_sz = strlen(nn->nfsd_name);
p = xdr_reserve_space(xdr,
8 /* eir_clientid */ +
4 /* eir_sequenceid */ +
4 /* eir_flags */ +
4 /* spr_how */);
if (!p)
if (nfsd4_encode_clientid4(xdr, &exid->clientid) != nfs_ok)
return nfserr_resource;
if (xdr_stream_encode_u32(xdr, exid->seqid) < 0)
return nfserr_resource;
if (xdr_stream_encode_u32(xdr, exid->flags) < 0)
return nfserr_resource;
p = xdr_encode_opaque_fixed(p, &exid->clientid, 8);
*p++ = cpu_to_be32(exid->seqid);
*p++ = cpu_to_be32(exid->flags);
*p++ = cpu_to_be32(exid->spa_how);
if (xdr_stream_encode_u32(xdr, exid->spa_how) < 0)
return nfserr_resource;
switch (exid->spa_how) {
case SP4_NONE:
break;
@ -5099,15 +5096,8 @@ nfsd4_encode_setxattr(struct nfsd4_compoundres *resp, __be32 nfserr,
{
struct nfsd4_setxattr *setxattr = &u->setxattr;
struct xdr_stream *xdr = resp->xdr;
__be32 *p;
p = xdr_reserve_space(xdr, 20);
if (!p)
return nfserr_resource;
encode_cinfo(p, &setxattr->setxa_cinfo);
return 0;
return nfsd4_encode_change_info4(xdr, &setxattr->setxa_cinfo);
}
/*
@ -5253,14 +5243,8 @@ nfsd4_encode_removexattr(struct nfsd4_compoundres *resp, __be32 nfserr,
{
struct nfsd4_removexattr *removexattr = &u->removexattr;
struct xdr_stream *xdr = resp->xdr;
__be32 *p;
p = xdr_reserve_space(xdr, 20);
if (!p)
return nfserr_resource;
p = encode_cinfo(p, &removexattr->rmxa_cinfo);
return 0;
return nfsd4_encode_change_info4(xdr, &removexattr->rmxa_cinfo);
}
typedef __be32(*nfsd4_enc)(struct nfsd4_compoundres *, __be32, union nfsd4_op_u *u);
@ -5460,6 +5444,12 @@ status:
release:
if (opdesc && opdesc->op_release)
opdesc->op_release(&op->u);
/*
* Account for pages consumed while encoding this operation.
* The xdr_stream primitives don't manage rq_next_page.
*/
rqstp->rq_next_page = xdr->page_ptr + 1;
}
/*
@ -5528,9 +5518,6 @@ nfs4svc_encode_compoundres(struct svc_rqst *rqstp, struct xdr_stream *xdr)
p = resp->statusp;
*p++ = resp->cstate.status;
rqstp->rq_next_page = xdr->page_ptr + 1;
*p++ = htonl(resp->taglen);
memcpy(p, resp->tag, resp->taglen);
p += XDR_QUADLEN(resp->taglen);

View File

@ -148,12 +148,23 @@ void nfsd_drc_slab_free(void)
kmem_cache_destroy(drc_slab);
}
static int nfsd_reply_cache_stats_init(struct nfsd_net *nn)
/**
* nfsd_net_reply_cache_init - per net namespace reply cache set-up
* @nn: nfsd_net being initialized
*
* Returns zero on succes; otherwise a negative errno is returned.
*/
int nfsd_net_reply_cache_init(struct nfsd_net *nn)
{
return nfsd_percpu_counters_init(nn->counter, NFSD_NET_COUNTERS_NUM);
}
static void nfsd_reply_cache_stats_destroy(struct nfsd_net *nn)
/**
* nfsd_net_reply_cache_destroy - per net namespace reply cache tear-down
* @nn: nfsd_net being freed
*
*/
void nfsd_net_reply_cache_destroy(struct nfsd_net *nn)
{
nfsd_percpu_counters_destroy(nn->counter, NFSD_NET_COUNTERS_NUM);
}
@ -169,17 +180,13 @@ int nfsd_reply_cache_init(struct nfsd_net *nn)
hashsize = nfsd_hashsize(nn->max_drc_entries);
nn->maskbits = ilog2(hashsize);
status = nfsd_reply_cache_stats_init(nn);
if (status)
goto out_nomem;
nn->nfsd_reply_cache_shrinker.scan_objects = nfsd_reply_cache_scan;
nn->nfsd_reply_cache_shrinker.count_objects = nfsd_reply_cache_count;
nn->nfsd_reply_cache_shrinker.seeks = 1;
status = register_shrinker(&nn->nfsd_reply_cache_shrinker,
"nfsd-reply:%s", nn->nfsd_name);
if (status)
goto out_stats_destroy;
return status;
nn->drc_hashtbl = kvzalloc(array_size(hashsize,
sizeof(*nn->drc_hashtbl)), GFP_KERNEL);
@ -195,9 +202,6 @@ int nfsd_reply_cache_init(struct nfsd_net *nn)
return 0;
out_shrinker:
unregister_shrinker(&nn->nfsd_reply_cache_shrinker);
out_stats_destroy:
nfsd_reply_cache_stats_destroy(nn);
out_nomem:
printk(KERN_ERR "nfsd: failed to allocate reply cache\n");
return -ENOMEM;
}
@ -217,7 +221,6 @@ void nfsd_reply_cache_shutdown(struct nfsd_net *nn)
rp, nn);
}
}
nfsd_reply_cache_stats_destroy(nn);
kvfree(nn->drc_hashtbl);
nn->drc_hashtbl = NULL;

View File

@ -25,6 +25,7 @@
#include "netns.h"
#include "pnfs.h"
#include "filecache.h"
#include "trace.h"
/*
* We have a single directory with several nodes in it.
@ -110,11 +111,11 @@ static ssize_t nfsctl_transaction_write(struct file *file, const char __user *bu
return PTR_ERR(data);
rv = write_op[ino](file, data, size);
if (rv >= 0) {
simple_transaction_set(file, rv);
rv = size;
}
if (rv < 0)
return rv;
simple_transaction_set(file, rv);
return size;
}
static ssize_t nfsctl_transaction_read(struct file *file, char __user *buf, size_t size, loff_t *pos)
@ -230,6 +231,7 @@ static ssize_t write_unlock_ip(struct file *file, char *buf, size_t size)
if (rpc_pton(net, fo_path, size, sap, salen) == 0)
return -EINVAL;
trace_nfsd_ctl_unlock_ip(net, buf);
return nlmsvc_unlock_all_by_ip(sap);
}
@ -263,7 +265,7 @@ static ssize_t write_unlock_fs(struct file *file, char *buf, size_t size)
fo_path = buf;
if (qword_get(&buf, fo_path, size) < 0)
return -EINVAL;
trace_nfsd_ctl_unlock_fs(netns(file), fo_path);
error = kern_path(fo_path, 0, &path);
if (error)
return error;
@ -338,9 +340,11 @@ static ssize_t write_filehandle(struct file *file, char *buf, size_t size)
return -EINVAL;
maxsize = min(maxsize, NFS3_FHSIZE);
if (qword_get(&mesg, mesg, size)>0)
if (qword_get(&mesg, mesg, size) > 0)
return -EINVAL;
trace_nfsd_ctl_filehandle(netns(file), dname, path, maxsize);
/* we have all the words, they are in buf.. */
dom = unix_domain_find(dname);
if (!dom)
@ -399,6 +403,7 @@ static ssize_t write_threads(struct file *file, char *buf, size_t size)
return rv;
if (newthreads < 0)
return -EINVAL;
trace_nfsd_ctl_threads(net, newthreads);
rv = nfsd_svc(newthreads, net, file->f_cred);
if (rv < 0)
return rv;
@ -471,6 +476,7 @@ static ssize_t write_pool_threads(struct file *file, char *buf, size_t size)
rv = -EINVAL;
if (nthreads[i] < 0)
goto out_free;
trace_nfsd_ctl_pool_threads(net, i, nthreads[i]);
}
rv = nfsd_set_nrthreads(i, nthreads, net);
if (rv)
@ -526,7 +532,7 @@ static ssize_t __write_versions(struct file *file, char *buf, size_t size)
char *sep;
struct nfsd_net *nn = net_generic(netns(file), nfsd_net_id);
if (size>0) {
if (size > 0) {
if (nn->nfsd_serv)
/* Cannot change versions without updating
* nn->nfsd_serv->sv_xdrsize, and reallocing
@ -536,6 +542,7 @@ static ssize_t __write_versions(struct file *file, char *buf, size_t size)
if (buf[size-1] != '\n')
return -EINVAL;
buf[size-1] = 0;
trace_nfsd_ctl_version(netns(file), buf);
vers = mesg;
len = qword_get(&mesg, vers, size);
@ -689,6 +696,7 @@ static ssize_t __write_ports_addfd(char *buf, struct net *net, const struct cred
err = get_int(&mesg, &fd);
if (err != 0 || fd < 0)
return -EINVAL;
trace_nfsd_ctl_ports_addfd(net, fd);
err = nfsd_create_serv(net);
if (err != 0)
@ -705,7 +713,7 @@ static ssize_t __write_ports_addfd(char *buf, struct net *net, const struct cred
}
/*
* A transport listener is added by writing it's transport name and
* A transport listener is added by writing its transport name and
* a port number.
*/
static ssize_t __write_ports_addxprt(char *buf, struct net *net, const struct cred *cred)
@ -720,6 +728,7 @@ static ssize_t __write_ports_addxprt(char *buf, struct net *net, const struct cr
if (port < 1 || port > USHRT_MAX)
return -EINVAL;
trace_nfsd_ctl_ports_addxprt(net, transport, port);
err = nfsd_create_serv(net);
if (err != 0)
@ -853,6 +862,8 @@ static ssize_t write_maxblksize(struct file *file, char *buf, size_t size)
int rv = get_int(&mesg, &bsize);
if (rv)
return rv;
trace_nfsd_ctl_maxblksize(netns(file), bsize);
/* force bsize into allowed range and
* required alignment.
*/
@ -903,6 +914,7 @@ static ssize_t write_maxconn(struct file *file, char *buf, size_t size)
if (rv)
return rv;
trace_nfsd_ctl_maxconn(netns(file), maxconn);
nn->max_connections = maxconn;
}
@ -913,6 +925,7 @@ static ssize_t write_maxconn(struct file *file, char *buf, size_t size)
static ssize_t __nfsd4_write_time(struct file *file, char *buf, size_t size,
time64_t *time, struct nfsd_net *nn)
{
struct dentry *dentry = file_dentry(file);
char *mesg = buf;
int rv, i;
@ -922,6 +935,9 @@ static ssize_t __nfsd4_write_time(struct file *file, char *buf, size_t size,
rv = get_int(&mesg, &i);
if (rv)
return rv;
trace_nfsd_ctl_time(netns(file), dentry->d_name.name,
dentry->d_name.len, i);
/*
* Some sanity checking. We don't have a reason for
* these particular numbers, but problems with the
@ -1014,6 +1030,7 @@ static ssize_t __write_recoverydir(struct file *file, char *buf, size_t size,
len = qword_get(&mesg, recdir, size);
if (len <= 0)
return -EINVAL;
trace_nfsd_ctl_recoverydir(netns(file), recdir);
status = nfs4_reset_recoverydir(recdir);
if (status)
@ -1087,7 +1104,7 @@ static ssize_t write_v4_end_grace(struct file *file, char *buf, size_t size)
case '1':
if (!nn->nfsd_serv)
return -EBUSY;
nfsd4_end_grace(nn);
trace_nfsd_end_grace(netns(file));
break;
default:
return -EINVAL;
@ -1192,7 +1209,7 @@ static int __nfsd_symlink(struct inode *dir, struct dentry *dentry,
* @content is assumed to be a NUL-terminated string that lives
* longer than the symlink itself.
*/
static void nfsd_symlink(struct dentry *parent, const char *name,
static void _nfsd_symlink(struct dentry *parent, const char *name,
const char *content)
{
struct inode *dir = parent->d_inode;
@ -1210,7 +1227,7 @@ out:
inode_unlock(dir);
}
#else
static inline void nfsd_symlink(struct dentry *parent, const char *name,
static inline void _nfsd_symlink(struct dentry *parent, const char *name,
const char *content)
{
}
@ -1389,7 +1406,7 @@ static int nfsd_fill_super(struct super_block *sb, struct fs_context *fc)
ret = simple_fill_super(sb, 0x6e667364, nfsd_files);
if (ret)
return ret;
nfsd_symlink(sb->s_root, "supported_krb5_enctypes",
_nfsd_symlink(sb->s_root, "supported_krb5_enctypes",
"/proc/net/rpc/gss_krb5_enctypes");
dentry = nfsd_mkdir(sb->s_root, NULL, "clients");
if (IS_ERR(dentry))
@ -1477,7 +1494,17 @@ static int create_proc_exports_entry(void)
unsigned int nfsd_net_id;
static __net_init int nfsd_init_net(struct net *net)
/**
* nfsd_net_init - Prepare the nfsd_net portion of a new net namespace
* @net: a freshly-created network namespace
*
* This information stays around as long as the network namespace is
* alive whether or not there is an NFSD instance running in the
* namespace.
*
* Returns zero on success, or a negative errno otherwise.
*/
static __net_init int nfsd_net_init(struct net *net)
{
int retval;
struct nfsd_net *nn = net_generic(net, nfsd_net_id);
@ -1488,6 +1515,9 @@ static __net_init int nfsd_init_net(struct net *net)
retval = nfsd_idmap_init(net);
if (retval)
goto out_idmap_error;
retval = nfsd_net_reply_cache_init(nn);
if (retval)
goto out_repcache_error;
nn->nfsd_versions = NULL;
nn->nfsd4_minorversions = NULL;
nfsd4_init_leases_net(nn);
@ -1496,22 +1526,32 @@ static __net_init int nfsd_init_net(struct net *net)
return 0;
out_repcache_error:
nfsd_idmap_shutdown(net);
out_idmap_error:
nfsd_export_shutdown(net);
out_export_error:
return retval;
}
static __net_exit void nfsd_exit_net(struct net *net)
/**
* nfsd_net_exit - Release the nfsd_net portion of a net namespace
* @net: a network namespace that is about to be destroyed
*
*/
static __net_exit void nfsd_net_exit(struct net *net)
{
struct nfsd_net *nn = net_generic(net, nfsd_net_id);
nfsd_net_reply_cache_destroy(nn);
nfsd_idmap_shutdown(net);
nfsd_export_shutdown(net);
nfsd_netns_free_versions(net_generic(net, nfsd_net_id));
nfsd_netns_free_versions(nn);
}
static struct pernet_operations nfsd_net_ops = {
.init = nfsd_init_net,
.exit = nfsd_exit_net,
.init = nfsd_net_init,
.exit = nfsd_net_exit,
.id = &nfsd_net_id,
.size = sizeof(struct nfsd_net),
};

View File

@ -623,16 +623,9 @@ void fh_fill_pre_attrs(struct svc_fh *fhp)
inode = d_inode(fhp->fh_dentry);
err = fh_getattr(fhp, &stat);
if (err) {
/* Grab the times from inode anyway */
stat.mtime = inode->i_mtime;
stat.ctime = inode->i_ctime;
stat.size = inode->i_size;
if (v4 && IS_I_VERSION(inode)) {
stat.change_cookie = inode_query_iversion(inode);
stat.result_mask |= STATX_CHANGE_COOKIE;
}
}
if (err)
return;
if (v4)
fhp->fh_pre_change = nfsd4_change_attribute(&stat, inode);
@ -660,14 +653,9 @@ void fh_fill_post_attrs(struct svc_fh *fhp)
printk("nfsd: inode locked twice during operation.\n");
err = fh_getattr(fhp, &fhp->fh_post_attr);
if (err) {
fhp->fh_post_saved = false;
fhp->fh_post_attr.ctime = inode->i_ctime;
if (v4 && IS_I_VERSION(inode)) {
fhp->fh_post_attr.change_cookie = inode_query_iversion(inode);
fhp->fh_post_attr.result_mask |= STATX_CHANGE_COOKIE;
}
} else
if (err)
return;
fhp->fh_post_saved = true;
if (v4)
fhp->fh_post_change =

View File

@ -176,9 +176,7 @@ nfsd_proc_read(struct svc_rqst *rqstp)
{
struct nfsd_readargs *argp = rqstp->rq_argp;
struct nfsd_readres *resp = rqstp->rq_resp;
unsigned int len;
u32 eof;
int v;
dprintk("nfsd: READ %s %d bytes at %d\n",
SVCFH_fmt(&argp->fh),
@ -187,17 +185,7 @@ nfsd_proc_read(struct svc_rqst *rqstp)
argp->count = min_t(u32, argp->count, NFSSVC_MAXBLKSIZE_V2);
argp->count = min_t(u32, argp->count, rqstp->rq_res.buflen);
v = 0;
len = argp->count;
resp->pages = rqstp->rq_next_page;
while (len > 0) {
struct page *page = *(rqstp->rq_next_page++);
rqstp->rq_vec[v].iov_base = page_address(page);
rqstp->rq_vec[v].iov_len = min_t(unsigned int, len, PAGE_SIZE);
len -= rqstp->rq_vec[v].iov_len;
v++;
}
/* Obtain buffer pointer for payload. 19 is 1 word for
* status, 17 words for fattr, and 1 word for the byte count.
@ -207,7 +195,7 @@ nfsd_proc_read(struct svc_rqst *rqstp)
resp->count = argp->count;
fh_copy(&resp->fh, &argp->fh);
resp->status = nfsd_read(rqstp, &resp->fh, argp->offset,
rqstp->rq_vec, v, &resp->count, &eof);
&resp->count, &eof);
if (resp->status == nfs_ok)
resp->status = fh_getattr(&resp->fh, &resp->stat);
else if (resp->status == nfserr_jukebox)

View File

@ -402,6 +402,11 @@ void nfsd_reset_write_verifier(struct nfsd_net *nn)
write_sequnlock(&nn->writeverf_lock);
}
/*
* Crank up a set of per-namespace resources for a new NFSD instance,
* including lockd, a duplicate reply cache, an open file cache
* instance, and a cache of NFSv4 state objects.
*/
static int nfsd_startup_net(struct net *net, const struct cred *cred)
{
struct nfsd_net *nn = net_generic(net, nfsd_net_id);

View File

@ -468,7 +468,8 @@ nfssvc_encode_readlinkres(struct svc_rqst *rqstp, struct xdr_stream *xdr)
case nfs_ok:
if (xdr_stream_encode_u32(xdr, resp->len) < 0)
return false;
xdr_write_pages(xdr, &resp->page, 0, resp->len);
svcxdr_encode_opaque_pages(rqstp, xdr, &resp->page, 0,
resp->len);
if (svc_encode_result_payload(rqstp, head->iov_len, resp->len) < 0)
return false;
break;
@ -491,7 +492,8 @@ nfssvc_encode_readres(struct svc_rqst *rqstp, struct xdr_stream *xdr)
return false;
if (xdr_stream_encode_u32(xdr, resp->count) < 0)
return false;
xdr_write_pages(xdr, resp->pages, rqstp->rq_res.page_base,
svcxdr_encode_opaque_pages(rqstp, xdr, resp->pages,
rqstp->rq_res.page_base,
resp->count);
if (svc_encode_result_payload(rqstp, head->iov_len, resp->count) < 0)
return false;
@ -511,7 +513,8 @@ nfssvc_encode_readdirres(struct svc_rqst *rqstp, struct xdr_stream *xdr)
return false;
switch (resp->status) {
case nfs_ok:
xdr_write_pages(xdr, dirlist->pages, 0, dirlist->len);
svcxdr_encode_opaque_pages(rqstp, xdr, dirlist->pages, 0,
dirlist->len);
/* no more entries */
if (xdr_stream_encode_item_absent(xdr) < 0)
return false;

View File

@ -1581,6 +1581,265 @@ TRACE_EVENT(nfsd_cb_recall_any_done,
)
);
TRACE_EVENT(nfsd_ctl_unlock_ip,
TP_PROTO(
const struct net *net,
const char *address
),
TP_ARGS(net, address),
TP_STRUCT__entry(
__field(unsigned int, netns_ino)
__string(address, address)
),
TP_fast_assign(
__entry->netns_ino = net->ns.inum;
__assign_str(address, address);
),
TP_printk("address=%s",
__get_str(address)
)
);
TRACE_EVENT(nfsd_ctl_unlock_fs,
TP_PROTO(
const struct net *net,
const char *path
),
TP_ARGS(net, path),
TP_STRUCT__entry(
__field(unsigned int, netns_ino)
__string(path, path)
),
TP_fast_assign(
__entry->netns_ino = net->ns.inum;
__assign_str(path, path);
),
TP_printk("path=%s",
__get_str(path)
)
);
TRACE_EVENT(nfsd_ctl_filehandle,
TP_PROTO(
const struct net *net,
const char *domain,
const char *path,
int maxsize
),
TP_ARGS(net, domain, path, maxsize),
TP_STRUCT__entry(
__field(unsigned int, netns_ino)
__field(int, maxsize)
__string(domain, domain)
__string(path, path)
),
TP_fast_assign(
__entry->netns_ino = net->ns.inum;
__entry->maxsize = maxsize;
__assign_str(domain, domain);
__assign_str(path, path);
),
TP_printk("domain=%s path=%s maxsize=%d",
__get_str(domain), __get_str(path), __entry->maxsize
)
);
TRACE_EVENT(nfsd_ctl_threads,
TP_PROTO(
const struct net *net,
int newthreads
),
TP_ARGS(net, newthreads),
TP_STRUCT__entry(
__field(unsigned int, netns_ino)
__field(int, newthreads)
),
TP_fast_assign(
__entry->netns_ino = net->ns.inum;
__entry->newthreads = newthreads;
),
TP_printk("newthreads=%d",
__entry->newthreads
)
);
TRACE_EVENT(nfsd_ctl_pool_threads,
TP_PROTO(
const struct net *net,
int pool,
int nrthreads
),
TP_ARGS(net, pool, nrthreads),
TP_STRUCT__entry(
__field(unsigned int, netns_ino)
__field(int, pool)
__field(int, nrthreads)
),
TP_fast_assign(
__entry->netns_ino = net->ns.inum;
__entry->pool = pool;
__entry->nrthreads = nrthreads;
),
TP_printk("pool=%d nrthreads=%d",
__entry->pool, __entry->nrthreads
)
);
TRACE_EVENT(nfsd_ctl_version,
TP_PROTO(
const struct net *net,
const char *mesg
),
TP_ARGS(net, mesg),
TP_STRUCT__entry(
__field(unsigned int, netns_ino)
__string(mesg, mesg)
),
TP_fast_assign(
__entry->netns_ino = net->ns.inum;
__assign_str(mesg, mesg);
),
TP_printk("%s",
__get_str(mesg)
)
);
TRACE_EVENT(nfsd_ctl_ports_addfd,
TP_PROTO(
const struct net *net,
int fd
),
TP_ARGS(net, fd),
TP_STRUCT__entry(
__field(unsigned int, netns_ino)
__field(int, fd)
),
TP_fast_assign(
__entry->netns_ino = net->ns.inum;
__entry->fd = fd;
),
TP_printk("fd=%d",
__entry->fd
)
);
TRACE_EVENT(nfsd_ctl_ports_addxprt,
TP_PROTO(
const struct net *net,
const char *transport,
int port
),
TP_ARGS(net, transport, port),
TP_STRUCT__entry(
__field(unsigned int, netns_ino)
__field(int, port)
__string(transport, transport)
),
TP_fast_assign(
__entry->netns_ino = net->ns.inum;
__entry->port = port;
__assign_str(transport, transport);
),
TP_printk("transport=%s port=%d",
__get_str(transport), __entry->port
)
);
TRACE_EVENT(nfsd_ctl_maxblksize,
TP_PROTO(
const struct net *net,
int bsize
),
TP_ARGS(net, bsize),
TP_STRUCT__entry(
__field(unsigned int, netns_ino)
__field(int, bsize)
),
TP_fast_assign(
__entry->netns_ino = net->ns.inum;
__entry->bsize = bsize;
),
TP_printk("bsize=%d",
__entry->bsize
)
);
TRACE_EVENT(nfsd_ctl_maxconn,
TP_PROTO(
const struct net *net,
int maxconn
),
TP_ARGS(net, maxconn),
TP_STRUCT__entry(
__field(unsigned int, netns_ino)
__field(int, maxconn)
),
TP_fast_assign(
__entry->netns_ino = net->ns.inum;
__entry->maxconn = maxconn;
),
TP_printk("maxconn=%d",
__entry->maxconn
)
);
TRACE_EVENT(nfsd_ctl_time,
TP_PROTO(
const struct net *net,
const char *name,
size_t namelen,
int time
),
TP_ARGS(net, name, namelen, time),
TP_STRUCT__entry(
__field(unsigned int, netns_ino)
__field(int, time)
__string_len(name, name, namelen)
),
TP_fast_assign(
__entry->netns_ino = net->ns.inum;
__entry->time = time;
__assign_str_len(name, name, namelen);
),
TP_printk("file=%s time=%d\n",
__get_str(name), __entry->time
)
);
TRACE_EVENT(nfsd_ctl_recoverydir,
TP_PROTO(
const struct net *net,
const char *recdir
),
TP_ARGS(net, recdir),
TP_STRUCT__entry(
__field(unsigned int, netns_ino)
__string(recdir, recdir)
),
TP_fast_assign(
__entry->netns_ino = net->ns.inum;
__assign_str(recdir, recdir);
),
TP_printk("recdir=%s",
__get_str(recdir)
)
);
TRACE_EVENT(nfsd_end_grace,
TP_PROTO(
const struct net *net
),
TP_ARGS(net),
TP_STRUCT__entry(
__field(unsigned int, netns_ino)
),
TP_fast_assign(
__entry->netns_ino = net->ns.inum;
),
TP_printk("nn=%d", __entry->netns_ino
)
);
#endif /* _NFSD_TRACE_H */
#undef TRACE_INCLUDE_PATH

View File

@ -388,7 +388,9 @@ nfsd_sanitize_attrs(struct inode *inode, struct iattr *iap)
iap->ia_mode &= ~S_ISGID;
} else {
/* set ATTR_KILL_* bits and let VFS handle it */
iap->ia_valid |= (ATTR_KILL_SUID | ATTR_KILL_SGID);
iap->ia_valid |= ATTR_KILL_SUID;
iap->ia_valid |=
setattr_should_drop_sgid(&nop_mnt_idmap, inode);
}
}
}
@ -1001,6 +1003,18 @@ static __be32 nfsd_finish_read(struct svc_rqst *rqstp, struct svc_fh *fhp,
}
}
/**
* nfsd_splice_read - Perform a VFS read using a splice pipe
* @rqstp: RPC transaction context
* @fhp: file handle of file to be read
* @file: opened struct file of file to be read
* @offset: starting byte offset
* @count: IN: requested number of bytes; OUT: number of bytes read
* @eof: OUT: set non-zero if operation reached the end of the file
*
* Returns nfs_ok on success, otherwise an nfserr stat value is
* returned.
*/
__be32 nfsd_splice_read(struct svc_rqst *rqstp, struct svc_fh *fhp,
struct file *file, loff_t offset, unsigned long *count,
u32 *eof)
@ -1014,22 +1028,50 @@ __be32 nfsd_splice_read(struct svc_rqst *rqstp, struct svc_fh *fhp,
ssize_t host_err;
trace_nfsd_read_splice(rqstp, fhp, offset, *count);
rqstp->rq_next_page = rqstp->rq_respages + 1;
host_err = splice_direct_to_actor(file, &sd, nfsd_direct_splice_actor);
return nfsd_finish_read(rqstp, fhp, file, offset, count, eof, host_err);
}
__be32 nfsd_readv(struct svc_rqst *rqstp, struct svc_fh *fhp,
struct file *file, loff_t offset,
struct kvec *vec, int vlen, unsigned long *count,
u32 *eof)
/**
* nfsd_iter_read - Perform a VFS read using an iterator
* @rqstp: RPC transaction context
* @fhp: file handle of file to be read
* @file: opened struct file of file to be read
* @offset: starting byte offset
* @count: IN: requested number of bytes; OUT: number of bytes read
* @base: offset in first page of read buffer
* @eof: OUT: set non-zero if operation reached the end of the file
*
* Some filesystems or situations cannot use nfsd_splice_read. This
* function is the slightly less-performant fallback for those cases.
*
* Returns nfs_ok on success, otherwise an nfserr stat value is
* returned.
*/
__be32 nfsd_iter_read(struct svc_rqst *rqstp, struct svc_fh *fhp,
struct file *file, loff_t offset, unsigned long *count,
unsigned int base, u32 *eof)
{
unsigned long v, total;
struct iov_iter iter;
loff_t ppos = offset;
struct page *page;
ssize_t host_err;
v = 0;
total = *count;
while (total) {
page = *(rqstp->rq_next_page++);
rqstp->rq_vec[v].iov_base = page_address(page) + base;
rqstp->rq_vec[v].iov_len = min_t(size_t, total, PAGE_SIZE - base);
total -= rqstp->rq_vec[v].iov_len;
++v;
base = 0;
}
WARN_ON_ONCE(v > ARRAY_SIZE(rqstp->rq_vec));
trace_nfsd_read_vector(rqstp, fhp, offset, *count);
iov_iter_kvec(&iter, ITER_DEST, vec, vlen, *count);
iov_iter_kvec(&iter, ITER_DEST, rqstp->rq_vec, v, *count);
host_err = vfs_iter_read(file, &iter, &ppos, 0);
return nfsd_finish_read(rqstp, fhp, file, offset, count, eof, host_err);
}
@ -1159,14 +1201,24 @@ out_nfserr:
return nfserr;
}
/*
* Read data from a file. count must contain the requested read count
* on entry. On return, *count contains the number of bytes actually read.
/**
* nfsd_read - Read data from a file
* @rqstp: RPC transaction context
* @fhp: file handle of file to be read
* @offset: starting byte offset
* @count: IN: requested number of bytes; OUT: number of bytes read
* @eof: OUT: set non-zero if operation reached the end of the file
*
* The caller must verify that there is enough space in @rqstp.rq_res
* to perform this operation.
*
* N.B. After this call fhp needs an fh_put
*
* Returns nfs_ok on success, otherwise an nfserr stat value is
* returned.
*/
__be32 nfsd_read(struct svc_rqst *rqstp, struct svc_fh *fhp,
loff_t offset, struct kvec *vec, int vlen, unsigned long *count,
u32 *eof)
loff_t offset, unsigned long *count, u32 *eof)
{
struct nfsd_file *nf;
struct file *file;
@ -1181,12 +1233,10 @@ __be32 nfsd_read(struct svc_rqst *rqstp, struct svc_fh *fhp,
if (file->f_op->splice_read && test_bit(RQ_SPLICE_OK, &rqstp->rq_flags))
err = nfsd_splice_read(rqstp, fhp, file, offset, count, eof);
else
err = nfsd_readv(rqstp, fhp, file, offset, vec, vlen, count, eof);
err = nfsd_iter_read(rqstp, fhp, file, offset, count, 0, eof);
nfsd_file_put(nf);
trace_nfsd_read_done(rqstp, fhp, offset, *count);
return err;
}

View File

@ -110,13 +110,12 @@ __be32 nfsd_splice_read(struct svc_rqst *rqstp, struct svc_fh *fhp,
struct file *file, loff_t offset,
unsigned long *count,
u32 *eof);
__be32 nfsd_readv(struct svc_rqst *rqstp, struct svc_fh *fhp,
__be32 nfsd_iter_read(struct svc_rqst *rqstp, struct svc_fh *fhp,
struct file *file, loff_t offset,
struct kvec *vec, int vlen,
unsigned long *count,
unsigned long *count, unsigned int base,
u32 *eof);
__be32 nfsd_read(struct svc_rqst *, struct svc_fh *,
loff_t, struct kvec *, int, unsigned long *,
__be32 nfsd_read(struct svc_rqst *rqstp, struct svc_fh *fhp,
loff_t offset, unsigned long *count,
u32 *eof);
__be32 nfsd_write(struct svc_rqst *, struct svc_fh *, loff_t,
struct kvec *, int, unsigned long *,

View File

@ -508,6 +508,27 @@ static inline void svcxdr_init_encode(struct svc_rqst *rqstp)
xdr->rqst = NULL;
}
/**
* svcxdr_encode_opaque_pages - Insert pages into an xdr_stream
* @xdr: xdr_stream to be updated
* @pages: array of pages to insert
* @base: starting offset of first data byte in @pages
* @len: number of data bytes in @pages to insert
*
* After the @pages are added, the tail iovec is instantiated pointing
* to end of the head buffer, and the stream is set up to encode
* subsequent items into the tail.
*/
static inline void svcxdr_encode_opaque_pages(struct svc_rqst *rqstp,
struct xdr_stream *xdr,
struct page **pages,
unsigned int base,
unsigned int len)
{
xdr_write_pages(xdr, pages, base, len);
xdr->page_ptr = rqstp->rq_next_page - 1;
}
/**
* svcxdr_set_auth_slack -
* @rqstp: RPC transaction

View File

@ -135,7 +135,6 @@ struct svc_rdma_recv_ctxt {
struct ib_sge rc_recv_sge;
void *rc_recv_buf;
struct xdr_stream rc_stream;
bool rc_temp;
u32 rc_byte_len;
unsigned int rc_page_count;
u32 rc_inv_rkey;
@ -155,12 +154,12 @@ struct svc_rdma_send_ctxt {
struct ib_send_wr sc_send_wr;
struct ib_cqe sc_cqe;
struct completion sc_done;
struct xdr_buf sc_hdrbuf;
struct xdr_stream sc_stream;
void *sc_xprt_buf;
int sc_page_count;
int sc_cur_sge_no;
struct page *sc_pages[RPCSVC_MAXPAGES];
struct ib_sge sc_sges[];
};

View File

@ -242,8 +242,7 @@ extern void xdr_init_encode(struct xdr_stream *xdr, struct xdr_buf *buf,
extern void xdr_init_encode_pages(struct xdr_stream *xdr, struct xdr_buf *buf,
struct page **pages, struct rpc_rqst *rqst);
extern __be32 *xdr_reserve_space(struct xdr_stream *xdr, size_t nbytes);
extern int xdr_reserve_space_vec(struct xdr_stream *xdr, struct kvec *vec,
size_t nbytes);
extern int xdr_reserve_space_vec(struct xdr_stream *xdr, size_t nbytes);
extern void __xdr_commit_encode(struct xdr_stream *xdr);
extern void xdr_truncate_encode(struct xdr_stream *xdr, size_t len);
extern void xdr_truncate_decode(struct xdr_stream *xdr, size_t len);

View File

@ -2112,6 +2112,14 @@ DEFINE_POST_CHUNK_EVENT(read);
DEFINE_POST_CHUNK_EVENT(write);
DEFINE_POST_CHUNK_EVENT(reply);
DEFINE_EVENT(svcrdma_post_chunk_class, svcrdma_cc_release,
TP_PROTO(
const struct rpc_rdma_cid *cid,
int sqecount
),
TP_ARGS(cid, sqecount)
);
TRACE_EVENT(svcrdma_wc_read,
TP_PROTO(
const struct ib_wc *wc,

View File

@ -2104,31 +2104,46 @@ DEFINE_SVC_DEFERRED_EVENT(drop);
DEFINE_SVC_DEFERRED_EVENT(queue);
DEFINE_SVC_DEFERRED_EVENT(recv);
TRACE_EVENT(svcsock_new_socket,
DECLARE_EVENT_CLASS(svcsock_lifetime_class,
TP_PROTO(
const void *svsk,
const struct socket *socket
),
TP_ARGS(socket),
TP_ARGS(svsk, socket),
TP_STRUCT__entry(
__field(unsigned int, netns_ino)
__field(const void *, svsk)
__field(const void *, sk)
__field(unsigned long, type)
__field(unsigned long, family)
__field(bool, listener)
__field(unsigned long, state)
),
TP_fast_assign(
__entry->type = socket->type;
__entry->family = socket->sk->sk_family;
__entry->listener = (socket->sk->sk_state == TCP_LISTEN);
),
struct sock *sk = socket->sk;
TP_printk("type=%s family=%s%s",
show_socket_type(__entry->type),
__entry->netns_ino = sock_net(sk)->ns.inum;
__entry->svsk = svsk;
__entry->sk = sk;
__entry->type = socket->type;
__entry->family = sk->sk_family;
__entry->state = sk->sk_state;
),
TP_printk("svsk=%p type=%s family=%s%s",
__entry->svsk, show_socket_type(__entry->type),
rpc_show_address_family(__entry->family),
__entry->listener ? " (listener)" : ""
__entry->state == TCP_LISTEN ? " (listener)" : ""
)
);
#define DEFINE_SVCSOCK_LIFETIME_EVENT(name) \
DEFINE_EVENT(svcsock_lifetime_class, name, \
TP_PROTO( \
const void *svsk, \
const struct socket *socket \
), \
TP_ARGS(svsk, socket))
DEFINE_SVCSOCK_LIFETIME_EVENT(svcsock_new);
DEFINE_SVCSOCK_LIFETIME_EVENT(svcsock_free);
TRACE_EVENT(svcsock_marker,
TP_PROTO(

View File

@ -109,15 +109,15 @@ param_get_pool_mode(char *buf, const struct kernel_param *kp)
switch (*ip)
{
case SVC_POOL_AUTO:
return strlcpy(buf, "auto\n", 20);
return sysfs_emit(buf, "auto\n");
case SVC_POOL_GLOBAL:
return strlcpy(buf, "global\n", 20);
return sysfs_emit(buf, "global\n");
case SVC_POOL_PERCPU:
return strlcpy(buf, "percpu\n", 20);
return sysfs_emit(buf, "percpu\n");
case SVC_POOL_PERNODE:
return strlcpy(buf, "pernode\n", 20);
return sysfs_emit(buf, "pernode\n");
default:
return sprintf(buf, "%d\n", *ip);
return sysfs_emit(buf, "%d\n", *ip);
}
}
@ -597,34 +597,25 @@ svc_destroy(struct kref *ref)
}
EXPORT_SYMBOL_GPL(svc_destroy);
/*
* Allocate an RPC server's buffer space.
* We allocate pages and place them in rq_pages.
*/
static int
static bool
svc_init_buffer(struct svc_rqst *rqstp, unsigned int size, int node)
{
unsigned int pages, arghi;
unsigned long pages, ret;
/* bc_xprt uses fore channel allocated buffers */
if (svc_is_backchannel(rqstp))
return 1;
return true;
pages = size / PAGE_SIZE + 1; /* extra page as we hold both request and reply.
* We assume one is at most one page
*/
arghi = 0;
WARN_ON_ONCE(pages > RPCSVC_MAXPAGES);
if (pages > RPCSVC_MAXPAGES)
pages = RPCSVC_MAXPAGES;
while (pages) {
struct page *p = alloc_pages_node(node, GFP_KERNEL, 0);
if (!p)
break;
rqstp->rq_pages[arghi++] = p;
pages--;
}
return pages == 0;
ret = alloc_pages_bulk_array_node(GFP_KERNEL, node, pages,
rqstp->rq_pages);
return ret == pages;
}
/*
@ -1173,6 +1164,7 @@ static void __svc_unregister(struct net *net, const u32 program, const u32 versi
*/
static void svc_unregister(const struct svc_serv *serv, struct net *net)
{
struct sighand_struct *sighand;
struct svc_program *progp;
unsigned long flags;
unsigned int i;
@ -1189,9 +1181,12 @@ static void svc_unregister(const struct svc_serv *serv, struct net *net)
}
}
spin_lock_irqsave(&current->sighand->siglock, flags);
rcu_read_lock();
sighand = rcu_dereference(current->sighand);
spin_lock_irqsave(&sighand->siglock, flags);
recalc_sigpending();
spin_unlock_irqrestore(&current->sighand->siglock, flags);
spin_unlock_irqrestore(&sighand->siglock, flags);
rcu_read_unlock();
}
/*

View File

@ -74,13 +74,18 @@ static LIST_HEAD(svc_xprt_class_list);
* that no other thread will be using the transport or will
* try to set XPT_DEAD.
*/
/**
* svc_reg_xprt_class - Register a server-side RPC transport class
* @xcl: New transport class to be registered
*
* Returns zero on success; otherwise a negative errno is returned.
*/
int svc_reg_xprt_class(struct svc_xprt_class *xcl)
{
struct svc_xprt_class *cl;
int res = -EEXIST;
dprintk("svc: Adding svc transport class '%s'\n", xcl->xcl_name);
INIT_LIST_HEAD(&xcl->xcl_list);
spin_lock(&svc_xprt_class_lock);
/* Make sure there isn't already a class with the same name */
@ -96,9 +101,13 @@ out:
}
EXPORT_SYMBOL_GPL(svc_reg_xprt_class);
/**
* svc_unreg_xprt_class - Unregister a server-side RPC transport class
* @xcl: Transport class to be unregistered
*
*/
void svc_unreg_xprt_class(struct svc_xprt_class *xcl)
{
dprintk("svc: Removing svc transport class '%s'\n", xcl->xcl_name);
spin_lock(&svc_xprt_class_lock);
list_del_init(&xcl->xcl_list);
spin_unlock(&svc_xprt_class_lock);
@ -685,8 +694,9 @@ static int svc_alloc_arg(struct svc_rqst *rqstp)
}
for (filled = 0; filled < pages; filled = ret) {
ret = alloc_pages_bulk_array(GFP_KERNEL, pages,
rqstp->rq_pages);
ret = alloc_pages_bulk_array_node(GFP_KERNEL,
rqstp->rq_pool->sp_id,
pages, rqstp->rq_pages);
if (ret > filled)
/* Made progress, don't sleep yet */
continue;
@ -843,15 +853,11 @@ static int svc_handle_xprt(struct svc_rqst *rqstp, struct svc_xprt *xprt)
svc_xprt_received(xprt);
} else if (svc_xprt_reserve_slot(rqstp, xprt)) {
/* XPT_DATA|XPT_DEFERRED case: */
dprintk("svc: server %p, pool %u, transport %p, inuse=%d\n",
rqstp, rqstp->rq_pool->sp_id, xprt,
kref_read(&xprt->xpt_ref));
rqstp->rq_deferred = svc_deferred_dequeue(xprt);
if (rqstp->rq_deferred)
len = svc_deferred_recv(rqstp);
else
len = xprt->xpt_ops->xpo_recvfrom(rqstp);
rqstp->rq_stime = ktime_get();
rqstp->rq_reserved = serv->sv_max_mesg;
atomic_add(rqstp->rq_reserved, &xprt->xpt_reserved);
} else
@ -894,6 +900,7 @@ int svc_recv(struct svc_rqst *rqstp, long timeout)
err = -EAGAIN;
if (len <= 0)
goto out_release;
trace_svc_xdr_recvfrom(&rqstp->rq_arg);
clear_bit(XPT_OLD, &xprt->xpt_flags);
@ -902,6 +909,7 @@ int svc_recv(struct svc_rqst *rqstp, long timeout)
if (serv->sv_stats)
serv->sv_stats->netcnt++;
rqstp->rq_stime = ktime_get();
return len;
out_release:
rqstp->rq_res.len = 0;

View File

@ -826,12 +826,6 @@ static void svc_tcp_listen_data_ready(struct sock *sk)
trace_sk_data_ready(sk);
if (svsk) {
/* Refer to svc_setup_socket() for details. */
rmb();
svsk->sk_odata(sk);
}
/*
* This callback may called twice when a new connection
* is established as a child socket inherits everything
@ -840,14 +834,19 @@ static void svc_tcp_listen_data_ready(struct sock *sk)
* when one of child sockets become ESTABLISHED.
* 2) data_ready method of the child socket may be called
* when it receives data before the socket is accepted.
* In case of 2, we should ignore it silently.
* In case of 2, we should ignore it silently and DO NOT
* dereference svsk.
*/
if (sk->sk_state == TCP_LISTEN) {
if (sk->sk_state != TCP_LISTEN)
return;
if (svsk) {
/* Refer to svc_setup_socket() for details. */
rmb();
svsk->sk_odata(sk);
set_bit(XPT_CONN, &svsk->sk_xprt.xpt_flags);
svc_xprt_enqueue(&svsk->sk_xprt);
}
}
}
/*
@ -887,12 +886,7 @@ static struct svc_xprt *svc_tcp_accept(struct svc_xprt *xprt)
clear_bit(XPT_CONN, &svsk->sk_xprt.xpt_flags);
err = kernel_accept(sock, &newsock, O_NONBLOCK);
if (err < 0) {
if (err == -ENOMEM)
printk(KERN_WARNING "%s: no more sockets!\n",
serv->sv_name);
else if (err != -EAGAIN)
net_warn_ratelimited("%s: accept failed (err %d)!\n",
serv->sv_name, -err);
if (err != -EAGAIN)
trace_svcsock_accept_err(xprt, serv->sv_name, err);
return NULL;
}
@ -1464,7 +1458,7 @@ static struct svc_sock *svc_setup_socket(struct svc_serv *serv,
svsk->sk_owspace = inet->sk_write_space;
/*
* This barrier is necessary in order to prevent race condition
* with svc_data_ready(), svc_listen_data_ready() and others
* with svc_data_ready(), svc_tcp_listen_data_ready(), and others
* when calling callbacks above.
*/
wmb();
@ -1476,7 +1470,7 @@ static struct svc_sock *svc_setup_socket(struct svc_serv *serv,
else
svc_tcp_init(svsk, serv);
trace_svcsock_new_socket(sock);
trace_svcsock_new(svsk, sock);
return svsk;
}
@ -1657,6 +1651,8 @@ static void svc_sock_free(struct svc_xprt *xprt)
struct svc_sock *svsk = container_of(xprt, struct svc_sock, sk_xprt);
struct socket *sock = svsk->sk_sock;
trace_svcsock_free(svsk, sock);
tls_handshake_cancel(sock->sk);
if (sock->file)
sockfd_put(sock);

View File

@ -1070,22 +1070,22 @@ __be32 * xdr_reserve_space(struct xdr_stream *xdr, size_t nbytes)
}
EXPORT_SYMBOL_GPL(xdr_reserve_space);
/**
* xdr_reserve_space_vec - Reserves a large amount of buffer space for sending
* @xdr: pointer to xdr_stream
* @vec: pointer to a kvec array
* @nbytes: number of bytes to reserve
*
* Reserves enough buffer space to encode 'nbytes' of data and stores the
* pointers in 'vec'. The size argument passed to xdr_reserve_space() is
* determined based on the number of bytes remaining in the current page to
* avoid invalidating iov_base pointers when xdr_commit_encode() is called.
* The size argument passed to xdr_reserve_space() is determined based
* on the number of bytes remaining in the current page to avoid
* invalidating iov_base pointers when xdr_commit_encode() is called.
*
* Return values:
* %0: success
* %-EMSGSIZE: not enough space is available in @xdr
*/
int xdr_reserve_space_vec(struct xdr_stream *xdr, struct kvec *vec, size_t nbytes)
int xdr_reserve_space_vec(struct xdr_stream *xdr, size_t nbytes)
{
int thislen;
int v = 0;
size_t thislen;
__be32 *p;
/*
@ -1097,21 +1097,19 @@ int xdr_reserve_space_vec(struct xdr_stream *xdr, struct kvec *vec, size_t nbyte
xdr->end = xdr->p;
}
/* XXX: Let's find a way to make this more efficient */
while (nbytes) {
thislen = xdr->buf->page_len % PAGE_SIZE;
thislen = min_t(size_t, nbytes, PAGE_SIZE - thislen);
p = xdr_reserve_space(xdr, thislen);
if (!p)
return -EIO;
return -EMSGSIZE;
vec[v].iov_base = p;
vec[v].iov_len = thislen;
v++;
nbytes -= thislen;
}
return v;
return 0;
}
EXPORT_SYMBOL_GPL(xdr_reserve_space_vec);

View File

@ -93,13 +93,7 @@ static int svc_rdma_bc_sendto(struct svcxprt_rdma *rdma,
*/
get_page(virt_to_page(rqst->rq_buffer));
sctxt->sc_send_wr.opcode = IB_WR_SEND;
ret = svc_rdma_send(rdma, sctxt);
if (ret < 0)
return ret;
ret = wait_for_completion_killable(&sctxt->sc_done);
svc_rdma_send_ctxt_put(rdma, sctxt);
return ret;
return svc_rdma_send(rdma, sctxt);
}
/* Server-side transport endpoint wants a whole page for its send

View File

@ -125,14 +125,15 @@ static void svc_rdma_recv_cid_init(struct svcxprt_rdma *rdma,
static struct svc_rdma_recv_ctxt *
svc_rdma_recv_ctxt_alloc(struct svcxprt_rdma *rdma)
{
int node = ibdev_to_node(rdma->sc_cm_id->device);
struct svc_rdma_recv_ctxt *ctxt;
dma_addr_t addr;
void *buffer;
ctxt = kmalloc(sizeof(*ctxt), GFP_KERNEL);
ctxt = kmalloc_node(sizeof(*ctxt), GFP_KERNEL, node);
if (!ctxt)
goto fail0;
buffer = kmalloc(rdma->sc_max_req_size, GFP_KERNEL);
buffer = kmalloc_node(rdma->sc_max_req_size, GFP_KERNEL, node);
if (!buffer)
goto fail1;
addr = ib_dma_map_single(rdma->sc_pd->device, buffer,
@ -155,7 +156,6 @@ svc_rdma_recv_ctxt_alloc(struct svcxprt_rdma *rdma)
ctxt->rc_recv_sge.length = rdma->sc_max_req_size;
ctxt->rc_recv_sge.lkey = rdma->sc_pd->local_dma_lkey;
ctxt->rc_recv_buf = buffer;
ctxt->rc_temp = false;
return ctxt;
fail2:
@ -232,10 +232,7 @@ void svc_rdma_recv_ctxt_put(struct svcxprt_rdma *rdma,
pcl_free(&ctxt->rc_write_pcl);
pcl_free(&ctxt->rc_reply_pcl);
if (!ctxt->rc_temp)
llist_add(&ctxt->rc_node, &rdma->sc_recv_ctxts);
else
svc_rdma_recv_ctxt_destroy(rdma, ctxt);
}
/**
@ -258,7 +255,7 @@ void svc_rdma_release_ctxt(struct svc_xprt *xprt, void *vctxt)
}
static bool svc_rdma_refresh_recvs(struct svcxprt_rdma *rdma,
unsigned int wanted, bool temp)
unsigned int wanted)
{
const struct ib_recv_wr *bad_wr = NULL;
struct svc_rdma_recv_ctxt *ctxt;
@ -275,7 +272,6 @@ static bool svc_rdma_refresh_recvs(struct svcxprt_rdma *rdma,
break;
trace_svcrdma_post_recv(ctxt);
ctxt->rc_temp = temp;
ctxt->rc_recv_wr.next = recv_chain;
recv_chain = &ctxt->rc_recv_wr;
rdma->sc_pending_recvs++;
@ -309,7 +305,7 @@ err_free:
*/
bool svc_rdma_post_recvs(struct svcxprt_rdma *rdma)
{
return svc_rdma_refresh_recvs(rdma, rdma->sc_max_requests, true);
return svc_rdma_refresh_recvs(rdma, rdma->sc_max_requests);
}
/**
@ -343,7 +339,7 @@ static void svc_rdma_wc_receive(struct ib_cq *cq, struct ib_wc *wc)
* client reconnects.
*/
if (rdma->sc_pending_recvs < rdma->sc_max_requests)
if (!svc_rdma_refresh_recvs(rdma, rdma->sc_recv_batch, false))
if (!svc_rdma_refresh_recvs(rdma, rdma->sc_recv_batch))
goto dropped;
/* All wc fields are now known to be valid */
@ -775,9 +771,6 @@ static bool svc_rdma_is_reverse_direction_reply(struct svc_xprt *xprt,
*
* The next ctxt is removed from the "receive" lists.
*
* - If the ctxt completes a Read, then finish assembling the Call
* message and return the number of bytes in the message.
*
* - If the ctxt completes a Receive, then construct the Call
* message from the contents of the Receive buffer.
*
@ -786,7 +779,8 @@ static bool svc_rdma_is_reverse_direction_reply(struct svc_xprt *xprt,
* in the message.
*
* - If there are Read chunks in this message, post Read WRs to
* pull that payload and return 0.
* pull that payload. When the Read WRs complete, build the
* full message and return the number of bytes in it.
*/
int svc_rdma_recvfrom(struct svc_rqst *rqstp)
{
@ -796,6 +790,12 @@ int svc_rdma_recvfrom(struct svc_rqst *rqstp)
struct svc_rdma_recv_ctxt *ctxt;
int ret;
/* Prevent svc_xprt_release() from releasing pages in rq_pages
* when returning 0 or an error.
*/
rqstp->rq_respages = rqstp->rq_pages;
rqstp->rq_next_page = rqstp->rq_respages;
rqstp->rq_xprt_ctxt = NULL;
ctxt = NULL;
@ -819,12 +819,6 @@ int svc_rdma_recvfrom(struct svc_rqst *rqstp)
DMA_FROM_DEVICE);
svc_rdma_build_arg_xdr(rqstp, ctxt);
/* Prevent svc_xprt_release from releasing pages in rq_pages
* if we return 0 or an error.
*/
rqstp->rq_respages = rqstp->rq_pages;
rqstp->rq_next_page = rqstp->rq_respages;
ret = svc_rdma_xdr_decode_req(&rqstp->rq_arg, ctxt);
if (ret < 0)
goto out_err;

View File

@ -62,8 +62,8 @@ svc_rdma_get_rw_ctxt(struct svcxprt_rdma *rdma, unsigned int sges)
if (node) {
ctxt = llist_entry(node, struct svc_rdma_rw_ctxt, rw_node);
} else {
ctxt = kmalloc(struct_size(ctxt, rw_first_sgl, SG_CHUNK_SIZE),
GFP_KERNEL);
ctxt = kmalloc_node(struct_size(ctxt, rw_first_sgl, SG_CHUNK_SIZE),
GFP_KERNEL, ibdev_to_node(rdma->sc_cm_id->device));
if (!ctxt)
goto out_noctx;
@ -84,8 +84,7 @@ out_noctx:
return NULL;
}
static void __svc_rdma_put_rw_ctxt(struct svcxprt_rdma *rdma,
struct svc_rdma_rw_ctxt *ctxt,
static void __svc_rdma_put_rw_ctxt(struct svc_rdma_rw_ctxt *ctxt,
struct llist_head *list)
{
sg_free_table_chained(&ctxt->rw_sg_table, SG_CHUNK_SIZE);
@ -95,7 +94,7 @@ static void __svc_rdma_put_rw_ctxt(struct svcxprt_rdma *rdma,
static void svc_rdma_put_rw_ctxt(struct svcxprt_rdma *rdma,
struct svc_rdma_rw_ctxt *ctxt)
{
__svc_rdma_put_rw_ctxt(rdma, ctxt, &rdma->sc_rw_ctxts);
__svc_rdma_put_rw_ctxt(ctxt, &rdma->sc_rw_ctxts);
}
/**
@ -191,6 +190,8 @@ static void svc_rdma_cc_release(struct svc_rdma_chunk_ctxt *cc,
struct svc_rdma_rw_ctxt *ctxt;
LLIST_HEAD(free);
trace_svcrdma_cc_release(&cc->cc_cid, cc->cc_sqecount);
first = last = NULL;
while ((ctxt = svc_rdma_next_ctxt(&cc->cc_rwctxts)) != NULL) {
list_del(&ctxt->rw_list);
@ -198,7 +199,7 @@ static void svc_rdma_cc_release(struct svc_rdma_chunk_ctxt *cc,
rdma_rw_ctx_destroy(&ctxt->rw_ctx, rdma->sc_qp,
rdma->sc_port_num, ctxt->rw_sg_table.sgl,
ctxt->rw_nents, dir);
__svc_rdma_put_rw_ctxt(rdma, ctxt, &free);
__svc_rdma_put_rw_ctxt(ctxt, &free);
ctxt->rw_node.next = first;
first = &ctxt->rw_node;
@ -234,7 +235,8 @@ svc_rdma_write_info_alloc(struct svcxprt_rdma *rdma,
{
struct svc_rdma_write_info *info;
info = kmalloc(sizeof(*info), GFP_KERNEL);
info = kmalloc_node(sizeof(*info), GFP_KERNEL,
ibdev_to_node(rdma->sc_cm_id->device));
if (!info)
return info;
@ -304,7 +306,8 @@ svc_rdma_read_info_alloc(struct svcxprt_rdma *rdma)
{
struct svc_rdma_read_info *info;
info = kmalloc(sizeof(*info), GFP_KERNEL);
info = kmalloc_node(sizeof(*info), GFP_KERNEL,
ibdev_to_node(rdma->sc_cm_id->device));
if (!info)
return info;
@ -351,8 +354,7 @@ static void svc_rdma_wc_read_done(struct ib_cq *cq, struct ib_wc *wc)
return;
}
/* This function sleeps when the transport's Send Queue is congested.
*
/*
* Assumptions:
* - If ib_post_send() succeeds, only one completion is expected,
* even if one or more WRs are flushed. This is true when posting
@ -367,6 +369,8 @@ static int svc_rdma_post_chunk_ctxt(struct svc_rdma_chunk_ctxt *cc)
struct ib_cqe *cqe;
int ret;
might_sleep();
if (cc->cc_sqecount > rdma->sc_sq_depth)
return -EINVAL;

View File

@ -123,18 +123,17 @@ static void svc_rdma_send_cid_init(struct svcxprt_rdma *rdma,
static struct svc_rdma_send_ctxt *
svc_rdma_send_ctxt_alloc(struct svcxprt_rdma *rdma)
{
int node = ibdev_to_node(rdma->sc_cm_id->device);
struct svc_rdma_send_ctxt *ctxt;
dma_addr_t addr;
void *buffer;
size_t size;
int i;
size = sizeof(*ctxt);
size += rdma->sc_max_send_sges * sizeof(struct ib_sge);
ctxt = kmalloc(size, GFP_KERNEL);
ctxt = kmalloc_node(struct_size(ctxt, sc_sges, rdma->sc_max_send_sges),
GFP_KERNEL, node);
if (!ctxt)
goto fail0;
buffer = kmalloc(rdma->sc_max_req_size, GFP_KERNEL);
buffer = kmalloc_node(rdma->sc_max_req_size, GFP_KERNEL, node);
if (!buffer)
goto fail1;
addr = ib_dma_map_single(rdma->sc_pd->device, buffer,
@ -148,7 +147,6 @@ svc_rdma_send_ctxt_alloc(struct svcxprt_rdma *rdma)
ctxt->sc_send_wr.wr_cqe = &ctxt->sc_cqe;
ctxt->sc_send_wr.sg_list = ctxt->sc_sges;
ctxt->sc_send_wr.send_flags = IB_SEND_SIGNALED;
init_completion(&ctxt->sc_done);
ctxt->sc_cqe.done = svc_rdma_wc_send;
ctxt->sc_xprt_buf = buffer;
xdr_buf_init(&ctxt->sc_hdrbuf, ctxt->sc_xprt_buf,
@ -214,6 +212,7 @@ out:
ctxt->sc_send_wr.num_sge = 0;
ctxt->sc_cur_sge_no = 0;
ctxt->sc_page_count = 0;
return ctxt;
out_empty:
@ -228,6 +227,8 @@ out_empty:
* svc_rdma_send_ctxt_put - Return send_ctxt to free list
* @rdma: controlling svcxprt_rdma
* @ctxt: object to return to the free list
*
* Pages left in sc_pages are DMA unmapped and released.
*/
void svc_rdma_send_ctxt_put(struct svcxprt_rdma *rdma,
struct svc_rdma_send_ctxt *ctxt)
@ -235,6 +236,9 @@ void svc_rdma_send_ctxt_put(struct svcxprt_rdma *rdma,
struct ib_device *device = rdma->sc_cm_id->device;
unsigned int i;
if (ctxt->sc_page_count)
release_pages(ctxt->sc_pages, ctxt->sc_page_count);
/* The first SGE contains the transport header, which
* remains mapped until @ctxt is destroyed.
*/
@ -281,12 +285,12 @@ static void svc_rdma_wc_send(struct ib_cq *cq, struct ib_wc *wc)
container_of(cqe, struct svc_rdma_send_ctxt, sc_cqe);
svc_rdma_wake_send_waiters(rdma, 1);
complete(&ctxt->sc_done);
if (unlikely(wc->status != IB_WC_SUCCESS))
goto flushed;
trace_svcrdma_wc_send(wc, &ctxt->sc_cid);
svc_rdma_send_ctxt_put(rdma, ctxt);
return;
flushed:
@ -294,6 +298,7 @@ flushed:
trace_svcrdma_wc_send_err(wc, &ctxt->sc_cid);
else
trace_svcrdma_wc_send_flush(wc, &ctxt->sc_cid);
svc_rdma_send_ctxt_put(rdma, ctxt);
svc_xprt_deferred_close(&rdma->sc_xprt);
}
@ -310,7 +315,7 @@ int svc_rdma_send(struct svcxprt_rdma *rdma, struct svc_rdma_send_ctxt *ctxt)
struct ib_send_wr *wr = &ctxt->sc_send_wr;
int ret;
reinit_completion(&ctxt->sc_done);
might_sleep();
/* Sync the transport header buffer */
ib_dma_sync_single_for_device(rdma->sc_pd->device,
@ -799,6 +804,25 @@ int svc_rdma_map_reply_msg(struct svcxprt_rdma *rdma,
svc_rdma_xb_dma_map, &args);
}
/* The svc_rqst and all resources it owns are released as soon as
* svc_rdma_sendto returns. Transfer pages under I/O to the ctxt
* so they are released by the Send completion handler.
*/
static void svc_rdma_save_io_pages(struct svc_rqst *rqstp,
struct svc_rdma_send_ctxt *ctxt)
{
int i, pages = rqstp->rq_next_page - rqstp->rq_respages;
ctxt->sc_page_count += pages;
for (i = 0; i < pages; i++) {
ctxt->sc_pages[i] = rqstp->rq_respages[i];
rqstp->rq_respages[i] = NULL;
}
/* Prevent svc_xprt_release from releasing pages in rq_pages */
rqstp->rq_next_page = rqstp->rq_respages;
}
/* Prepare the portion of the RPC Reply that will be transmitted
* via RDMA Send. The RPC-over-RDMA transport header is prepared
* in sc_sges[0], and the RPC xdr_buf is prepared in following sges.
@ -828,6 +852,8 @@ static int svc_rdma_send_reply_msg(struct svcxprt_rdma *rdma,
if (ret < 0)
return ret;
svc_rdma_save_io_pages(rqstp, sctxt);
if (rctxt->rc_inv_rkey) {
sctxt->sc_send_wr.opcode = IB_WR_SEND_WITH_INV;
sctxt->sc_send_wr.ex.invalidate_rkey = rctxt->rc_inv_rkey;
@ -835,13 +861,7 @@ static int svc_rdma_send_reply_msg(struct svcxprt_rdma *rdma,
sctxt->sc_send_wr.opcode = IB_WR_SEND;
}
ret = svc_rdma_send(rdma, sctxt);
if (ret < 0)
return ret;
ret = wait_for_completion_killable(&sctxt->sc_done);
svc_rdma_send_ctxt_put(rdma, sctxt);
return ret;
return svc_rdma_send(rdma, sctxt);
}
/**
@ -907,8 +927,7 @@ void svc_rdma_send_error_msg(struct svcxprt_rdma *rdma,
sctxt->sc_sges[0].length = sctxt->sc_hdrbuf.len;
if (svc_rdma_send(rdma, sctxt))
goto put_ctxt;
wait_for_completion_killable(&sctxt->sc_done);
return;
put_ctxt:
svc_rdma_send_ctxt_put(rdma, sctxt);
@ -976,17 +995,16 @@ int svc_rdma_sendto(struct svc_rqst *rqstp)
ret = svc_rdma_send_reply_msg(rdma, sctxt, rctxt, rqstp);
if (ret < 0)
goto put_ctxt;
/* Prevent svc_xprt_release() from releasing the page backing
* rq_res.head[0].iov_base. It's no longer being accessed by
* the I/O device. */
rqstp->rq_respages++;
return 0;
reply_chunk:
if (ret != -E2BIG && ret != -EINVAL)
goto put_ctxt;
/* Send completion releases payload pages that were part
* of previously posted RDMA Writes.
*/
svc_rdma_save_io_pages(rqstp, sctxt);
svc_rdma_send_error_msg(rdma, sctxt, rctxt, ret);
return 0;

View File

@ -64,7 +64,7 @@
#define RPCDBG_FACILITY RPCDBG_SVCXPRT
static struct svcxprt_rdma *svc_rdma_create_xprt(struct svc_serv *serv,
struct net *net);
struct net *net, int node);
static struct svc_xprt *svc_rdma_create(struct svc_serv *serv,
struct net *net,
struct sockaddr *sa, int salen,
@ -123,14 +123,14 @@ static void qp_event_handler(struct ib_event *event, void *context)
}
static struct svcxprt_rdma *svc_rdma_create_xprt(struct svc_serv *serv,
struct net *net)
struct net *net, int node)
{
struct svcxprt_rdma *cma_xprt = kzalloc(sizeof *cma_xprt, GFP_KERNEL);
struct svcxprt_rdma *cma_xprt;
if (!cma_xprt) {
dprintk("svcrdma: failed to create new transport\n");
cma_xprt = kzalloc_node(sizeof(*cma_xprt), GFP_KERNEL, node);
if (!cma_xprt)
return NULL;
}
svc_xprt_init(net, &svc_rdma_class, &cma_xprt->sc_xprt, serv);
INIT_LIST_HEAD(&cma_xprt->sc_accept_q);
INIT_LIST_HEAD(&cma_xprt->sc_rq_dto_q);
@ -193,9 +193,9 @@ static void handle_connect_req(struct rdma_cm_id *new_cma_id,
struct svcxprt_rdma *newxprt;
struct sockaddr *sa;
/* Create a new transport */
newxprt = svc_rdma_create_xprt(listen_xprt->sc_xprt.xpt_server,
listen_xprt->sc_xprt.xpt_net);
listen_xprt->sc_xprt.xpt_net,
ibdev_to_node(new_cma_id->device));
if (!newxprt)
return;
newxprt->sc_cm_id = new_cma_id;
@ -304,7 +304,7 @@ static struct svc_xprt *svc_rdma_create(struct svc_serv *serv,
if (sa->sa_family != AF_INET && sa->sa_family != AF_INET6)
return ERR_PTR(-EAFNOSUPPORT);
cma_xprt = svc_rdma_create_xprt(serv, net);
cma_xprt = svc_rdma_create_xprt(serv, net, NUMA_NO_NODE);
if (!cma_xprt)
return ERR_PTR(-ENOMEM);
set_bit(XPT_LISTENER, &cma_xprt->sc_xprt.xpt_flags);