From fd891454609ec036dc23e34536e45d655b4ca4db Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Tue, 28 Apr 2015 15:41:16 +0200 Subject: [PATCH 01/24] nfsd: remove nfsd_close Signed-off-by: Christoph Hellwig Signed-off-by: J. Bruce Fields --- fs/nfsd/nfs4state.c | 2 +- fs/nfsd/vfs.c | 19 +++++-------------- fs/nfsd/vfs.h | 1 - 3 files changed, 6 insertions(+), 16 deletions(-) diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c index 039f9c8a95e8..86f5c273c9ec 100644 --- a/fs/nfsd/nfs4state.c +++ b/fs/nfsd/nfs4state.c @@ -5505,7 +5505,7 @@ static __be32 nfsd_test_lock(struct svc_rqst *rqstp, struct svc_fh *fhp, struct __be32 err = nfsd_open(rqstp, fhp, S_IFREG, NFSD_MAY_READ, &file); if (!err) { err = nfserrno(vfs_test_lock(file, lock)); - nfsd_close(file); + fput(file); } return err; } diff --git a/fs/nfsd/vfs.c b/fs/nfsd/vfs.c index 84d770be056e..a30e79900086 100644 --- a/fs/nfsd/vfs.c +++ b/fs/nfsd/vfs.c @@ -744,7 +744,7 @@ nfsd_open(struct svc_rqst *rqstp, struct svc_fh *fhp, umode_t type, host_err = ima_file_check(file, may_flags, 0); if (host_err) { - nfsd_close(file); + fput(file); goto out_nfserr; } @@ -761,15 +761,6 @@ out: return err; } -/* - * Close a file. - */ -void -nfsd_close(struct file *filp) -{ - fput(filp); -} - /* * Obtain the readahead parameters for the file * specified by (dev, ino). @@ -1040,7 +1031,7 @@ void nfsd_put_tmp_read_open(struct file *file, struct raparms *ra) ra->p_count--; spin_unlock(&rab->pb_lock); } - nfsd_close(file); + fput(file); } /* @@ -1093,7 +1084,7 @@ nfsd_write(struct svc_rqst *rqstp, struct svc_fh *fhp, struct file *file, if (cnt) err = nfsd_vfs_write(rqstp, fhp, file, offset, vec, vlen, cnt, stablep); - nfsd_close(file); + fput(file); } out: return err; @@ -1138,7 +1129,7 @@ nfsd_commit(struct svc_rqst *rqstp, struct svc_fh *fhp, err = nfserr_notsupp; } - nfsd_close(file); + fput(file); out: return err; } @@ -1977,7 +1968,7 @@ nfsd_readdir(struct svc_rqst *rqstp, struct svc_fh *fhp, loff_t *offsetp, if (err == nfserr_eof || err == nfserr_toosmall) err = nfs_ok; /* can still be found in ->err */ out_close: - nfsd_close(file); + fput(file); out: return err; } diff --git a/fs/nfsd/vfs.h b/fs/nfsd/vfs.h index 2050cb016998..17a5e0db6a77 100644 --- a/fs/nfsd/vfs.h +++ b/fs/nfsd/vfs.h @@ -71,7 +71,6 @@ __be32 nfsd_commit(struct svc_rqst *, struct svc_fh *, #endif /* CONFIG_NFSD_V3 */ __be32 nfsd_open(struct svc_rqst *, struct svc_fh *, umode_t, int, struct file **); -void nfsd_close(struct file *); struct raparms; __be32 nfsd_get_tmp_read_open(struct svc_rqst *, struct svc_fh *, struct file **, struct raparms **); From 72faedae8bc3504ee4252cebf14737a23677cb8f Mon Sep 17 00:00:00 2001 From: Scott Mayhew Date: Wed, 29 Apr 2015 10:38:26 -0400 Subject: [PATCH 02/24] Documentation: remove overloads-avoided counter from knfsd-stats.txt The 'overloads-avoided' counter itself was removed several years ago by commit 78c210e (Revert "knfsd: avoid overloading the CPU scheduler with enormous load averages"). Signed-off-by: Scott Mayhew Signed-off-by: J. Bruce Fields --- Documentation/filesystems/nfs/knfsd-stats.txt | 44 ++----------------- 1 file changed, 4 insertions(+), 40 deletions(-) diff --git a/Documentation/filesystems/nfs/knfsd-stats.txt b/Documentation/filesystems/nfs/knfsd-stats.txt index 64ced5149d37..1a5d82180b84 100644 --- a/Documentation/filesystems/nfs/knfsd-stats.txt +++ b/Documentation/filesystems/nfs/knfsd-stats.txt @@ -68,16 +68,10 @@ sockets-enqueued rate of change for this counter is zero; significantly non-zero values may indicate a performance limitation. - This can happen either because there are too few nfsd threads in the - thread pool for the NFS workload (the workload is thread-limited), - or because the NFS workload needs more CPU time than is available in - the thread pool (the workload is CPU-limited). In the former case, - configuring more nfsd threads will probably improve the performance - of the NFS workload. In the latter case, the sunrpc server layer is - already choosing not to wake idle nfsd threads because there are too - many nfsd threads which want to run but cannot, so configuring more - nfsd threads will make no difference whatsoever. The overloads-avoided - statistic (see below) can be used to distinguish these cases. + This can happen because there are too few nfsd threads in the thread + pool for the NFS workload (the workload is thread-limited), in which + case configuring more nfsd threads will probably improve the + performance of the NFS workload. threads-woken Counts how many times an idle nfsd thread is woken to try to @@ -88,36 +82,6 @@ threads-woken thing. The ideal rate of change for this counter will be close to but less than the rate of change of the packets-arrived counter. -overloads-avoided - Counts how many times the sunrpc server layer chose not to wake an - nfsd thread, despite the presence of idle nfsd threads, because - too many nfsd threads had been recently woken but could not get - enough CPU time to actually run. - - This statistic counts a circumstance where the sunrpc layer - heuristically avoids overloading the CPU scheduler with too many - runnable nfsd threads. The ideal rate of change for this counter - is zero. Significant non-zero values indicate that the workload - is CPU limited. Usually this is associated with heavy CPU usage - on all the CPUs in the nfsd thread pool. - - If a sustained large overloads-avoided rate is detected on a pool, - the top(1) utility should be used to check for the following - pattern of CPU usage on all the CPUs associated with the given - nfsd thread pool. - - - %us ~= 0 (as you're *NOT* running applications on your NFS server) - - - %wa ~= 0 - - - %id ~= 0 - - - %sy + %hi + %si ~= 100 - - If this pattern is seen, configuring more nfsd threads will *not* - improve the performance of the workload. If this patten is not - seen, then something more subtle is wrong. - threads-timedout Counts how many times an nfsd thread triggered an idle timeout, i.e. was not woken to handle any incoming network packets for From 43b0e7ea590b51052384ecedf4ad7c8d7898e8e7 Mon Sep 17 00:00:00 2001 From: NeilBrown Date: Sun, 3 May 2015 09:16:53 +1000 Subject: [PATCH 03/24] nfsd: stop READDIRPLUS returning inconsistent attributes The NFSv3 READDIRPLUS gets some of the returned attributes from the readdir, and some from an inode returned from a new lookup. The two objects could be different thanks to intervening renames. The attributes in READDIRPLUS are optional, so let's just skip them if we notice this case. Signed-off-by: NeilBrown Signed-off-by: J. Bruce Fields --- fs/nfsd/nfs3xdr.c | 12 +++++++----- 1 file changed, 7 insertions(+), 5 deletions(-) diff --git a/fs/nfsd/nfs3xdr.c b/fs/nfsd/nfs3xdr.c index e4b2b4322553..f6e7cbabac5a 100644 --- a/fs/nfsd/nfs3xdr.c +++ b/fs/nfsd/nfs3xdr.c @@ -805,7 +805,7 @@ encode_entry_baggage(struct nfsd3_readdirres *cd, __be32 *p, const char *name, static __be32 compose_entry_fh(struct nfsd3_readdirres *cd, struct svc_fh *fhp, - const char *name, int namlen) + const char *name, int namlen, u64 ino) { struct svc_export *exp; struct dentry *dparent, *dchild; @@ -830,19 +830,21 @@ compose_entry_fh(struct nfsd3_readdirres *cd, struct svc_fh *fhp, goto out; if (d_really_is_negative(dchild)) goto out; + if (dchild->d_inode->i_ino != ino) + goto out; rv = fh_compose(fhp, exp, dchild, &cd->fh); out: dput(dchild); return rv; } -static __be32 *encode_entryplus_baggage(struct nfsd3_readdirres *cd, __be32 *p, const char *name, int namlen) +static __be32 *encode_entryplus_baggage(struct nfsd3_readdirres *cd, __be32 *p, const char *name, int namlen, u64 ino) { struct svc_fh *fh = &cd->scratch; __be32 err; fh_init(fh, NFS3_FHSIZE); - err = compose_entry_fh(cd, fh, name, namlen); + err = compose_entry_fh(cd, fh, name, namlen, ino); if (err) { *p++ = 0; *p++ = 0; @@ -927,7 +929,7 @@ encode_entry(struct readdir_cd *ccd, const char *name, int namlen, p = encode_entry_baggage(cd, p, name, namlen, ino); if (plus) - p = encode_entryplus_baggage(cd, p, name, namlen); + p = encode_entryplus_baggage(cd, p, name, namlen, ino); num_entry_words = p - cd->buffer; } else if (*(page+1) != NULL) { /* temporarily encode entry into next page, then move back to @@ -941,7 +943,7 @@ encode_entry(struct readdir_cd *ccd, const char *name, int namlen, p1 = encode_entry_baggage(cd, p1, name, namlen, ino); if (plus) - p1 = encode_entryplus_baggage(cd, p1, name, namlen); + p1 = encode_entryplus_baggage(cd, p1, name, namlen, ino); /* determine entry word length and lengths to go in pages */ num_entry_words = p1 - tmp; From cc265089ce1b176dde963c74b53593446ee7f99a Mon Sep 17 00:00:00 2001 From: Andreas Gruenbacher Date: Sat, 9 May 2015 00:37:57 +0200 Subject: [PATCH 04/24] nfsd: Disable NFSv2 timestamp workaround for NFSv3+ NFSv2 can set the atime and/or mtime of a file to specific timestamps but not to the server's current time. To implement the equivalent of utimes("file", NULL), it uses a heuristic. NFSv3 and later do support setting the atime and/or mtime to the server's current time directly. The NFSv2 heuristic is still enabled, and causes timestamps to be set wrong sometimes. Fix this by moving the heuristic into the NFSv2 specific code. We can leave it out of the create code path: the owner can always set timestamps arbitrarily, and the workaround would never trigger. Signed-off-by: Andreas Gruenbacher Reviewed-by: Christoph Hellwig Signed-off-by: J. Bruce Fields --- fs/nfsd/nfsproc.c | 52 +++++++++++++++++++++++++++++++++++++++++++++-- fs/nfsd/vfs.c | 36 -------------------------------- 2 files changed, 50 insertions(+), 38 deletions(-) diff --git a/fs/nfsd/nfsproc.c b/fs/nfsd/nfsproc.c index aecbcd34d336..4cd78ef4c95c 100644 --- a/fs/nfsd/nfsproc.c +++ b/fs/nfsd/nfsproc.c @@ -59,13 +59,61 @@ static __be32 nfsd_proc_setattr(struct svc_rqst *rqstp, struct nfsd_sattrargs *argp, struct nfsd_attrstat *resp) { + struct iattr *iap = &argp->attrs; + struct svc_fh *fhp; __be32 nfserr; + dprintk("nfsd: SETATTR %s, valid=%x, size=%ld\n", SVCFH_fmt(&argp->fh), argp->attrs.ia_valid, (long) argp->attrs.ia_size); - fh_copy(&resp->fh, &argp->fh); - nfserr = nfsd_setattr(rqstp, &resp->fh, &argp->attrs,0, (time_t)0); + fhp = fh_copy(&resp->fh, &argp->fh); + + /* + * NFSv2 does not differentiate between "set-[ac]time-to-now" + * which only requires access, and "set-[ac]time-to-X" which + * requires ownership. + * So if it looks like it might be "set both to the same time which + * is close to now", and if inode_change_ok fails, then we + * convert to "set to now" instead of "set to explicit time" + * + * We only call inode_change_ok as the last test as technically + * it is not an interface that we should be using. + */ +#define BOTH_TIME_SET (ATTR_ATIME_SET | ATTR_MTIME_SET) +#define MAX_TOUCH_TIME_ERROR (30*60) + if ((iap->ia_valid & BOTH_TIME_SET) == BOTH_TIME_SET && + iap->ia_mtime.tv_sec == iap->ia_atime.tv_sec) { + /* + * Looks probable. + * + * Now just make sure time is in the right ballpark. + * Solaris, at least, doesn't seem to care what the time + * request is. We require it be within 30 minutes of now. + */ + time_t delta = iap->ia_atime.tv_sec - get_seconds(); + struct inode *inode; + + nfserr = fh_verify(rqstp, fhp, 0, NFSD_MAY_NOP); + if (nfserr) + goto done; + inode = d_inode(fhp->fh_dentry); + + if (delta < 0) + delta = -delta; + if (delta < MAX_TOUCH_TIME_ERROR && + inode_change_ok(inode, iap) != 0) { + /* + * Turn off ATTR_[AM]TIME_SET but leave ATTR_[AM]TIME. + * This will cause notify_change to set these times + * to "now" + */ + iap->ia_valid &= ~BOTH_TIME_SET; + } + } + + nfserr = nfsd_setattr(rqstp, fhp, iap, 0, (time_t)0); +done: return nfsd_return_attrs(nfserr, resp); } diff --git a/fs/nfsd/vfs.c b/fs/nfsd/vfs.c index a30e79900086..b540ca789906 100644 --- a/fs/nfsd/vfs.c +++ b/fs/nfsd/vfs.c @@ -302,42 +302,6 @@ commit_metadata(struct svc_fh *fhp) static void nfsd_sanitize_attrs(struct inode *inode, struct iattr *iap) { - /* - * NFSv2 does not differentiate between "set-[ac]time-to-now" - * which only requires access, and "set-[ac]time-to-X" which - * requires ownership. - * So if it looks like it might be "set both to the same time which - * is close to now", and if inode_change_ok fails, then we - * convert to "set to now" instead of "set to explicit time" - * - * We only call inode_change_ok as the last test as technically - * it is not an interface that we should be using. - */ -#define BOTH_TIME_SET (ATTR_ATIME_SET | ATTR_MTIME_SET) -#define MAX_TOUCH_TIME_ERROR (30*60) - if ((iap->ia_valid & BOTH_TIME_SET) == BOTH_TIME_SET && - iap->ia_mtime.tv_sec == iap->ia_atime.tv_sec) { - /* - * Looks probable. - * - * Now just make sure time is in the right ballpark. - * Solaris, at least, doesn't seem to care what the time - * request is. We require it be within 30 minutes of now. - */ - time_t delta = iap->ia_atime.tv_sec - get_seconds(); - if (delta < 0) - delta = -delta; - if (delta < MAX_TOUCH_TIME_ERROR && - inode_change_ok(inode, iap) != 0) { - /* - * Turn off ATTR_[AM]TIME_SET but leave ATTR_[AM]TIME. - * This will cause notify_change to set these times - * to "now" - */ - iap->ia_valid &= ~BOTH_TIME_SET; - } - } - /* sanitize the mode change */ if (iap->ia_valid & ATTR_MODE) { iap->ia_mode &= S_IALLUGO; From 0c9d65e76a19ad7c3d7e64fd30bc66b8b6d000ee Mon Sep 17 00:00:00 2001 From: Andreas Gruenbacher Date: Fri, 24 Apr 2015 13:04:00 +0200 Subject: [PATCH 05/24] nfsd: Checking for acl support does not require fetching any acls Whether or not a file system supports acls can be determined with IS_POSIXACL(inode) and does not require trying to fetch any acls; the code for computing the supported_attrs and aclsupport attributes can be simplified. Signed-off-by: Andreas Gruenbacher Signed-off-by: J. Bruce Fields --- fs/nfsd/nfs4xdr.c | 25 ++++++++++--------------- 1 file changed, 10 insertions(+), 15 deletions(-) diff --git a/fs/nfsd/nfs4xdr.c b/fs/nfsd/nfs4xdr.c index 158badf945df..5c7eab9c12ba 100644 --- a/fs/nfsd/nfs4xdr.c +++ b/fs/nfsd/nfs4xdr.c @@ -2227,7 +2227,6 @@ nfsd4_encode_fattr(struct xdr_stream *xdr, struct svc_fh *fhp, u32 rdattr_err = 0; __be32 status; int err; - int aclsupport = 0; struct nfs4_acl *acl = NULL; void *context = NULL; int contextlen; @@ -2274,19 +2273,15 @@ nfsd4_encode_fattr(struct xdr_stream *xdr, struct svc_fh *fhp, goto out; fhp = tempfh; } - if (bmval0 & (FATTR4_WORD0_ACL | FATTR4_WORD0_ACLSUPPORT - | FATTR4_WORD0_SUPPORTED_ATTRS)) { + if (bmval0 & FATTR4_WORD0_ACL) { err = nfsd4_get_nfs4_acl(rqstp, dentry, &acl); - aclsupport = (err == 0); - if (bmval0 & FATTR4_WORD0_ACL) { - if (err == -EOPNOTSUPP) - bmval0 &= ~FATTR4_WORD0_ACL; - else if (err == -EINVAL) { - status = nfserr_attrnotsupp; - goto out; - } else if (err != 0) - goto out_nfserr; - } + if (err == -EOPNOTSUPP) + bmval0 &= ~FATTR4_WORD0_ACL; + else if (err == -EINVAL) { + status = nfserr_attrnotsupp; + goto out; + } else if (err != 0) + goto out_nfserr; } #ifdef CONFIG_NFSD_V4_SECURITY_LABEL @@ -2338,7 +2333,7 @@ nfsd4_encode_fattr(struct xdr_stream *xdr, struct svc_fh *fhp, u32 word1 = nfsd_suppattrs1(minorversion); u32 word2 = nfsd_suppattrs2(minorversion); - if (!aclsupport) + if (!IS_POSIXACL(dentry->d_inode)) word0 &= ~FATTR4_WORD0_ACL; if (!contextsupport) word2 &= ~FATTR4_WORD2_SECURITY_LABEL; @@ -2486,7 +2481,7 @@ out_acl: p = xdr_reserve_space(xdr, 4); if (!p) goto out_resource; - *p++ = cpu_to_be32(aclsupport ? + *p++ = cpu_to_be32(IS_POSIXACL(dentry->d_inode) ? ACL4_SUPPORT_ALLOW_ACL|ACL4_SUPPORT_DENY_ACL : 0); } if (bmval0 & FATTR4_WORD0_CANSETTIME) { From 6ac75368e1a658903cf57b2bbf66e60d34f55558 Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Tue, 12 May 2015 23:31:29 +0200 Subject: [PATCH 06/24] nfsd: work around a gcc-5.1 warning gcc-5.0 warns about a potential uninitialized variable use in nfsd: fs/nfsd/nfs4state.c: In function 'nfsd4_process_open2': fs/nfsd/nfs4state.c:3781:3: warning: 'old_deny_bmap' may be used uninitialized in this function [-Wmaybe-uninitialized] reset_union_bmap_deny(old_deny_bmap, stp); ^ fs/nfsd/nfs4state.c:3760:16: note: 'old_deny_bmap' was declared here unsigned char old_deny_bmap; ^ This is a false positive, the code path that is warned about cannot actually be reached. This adds an initialization for the variable to make the warning go away. Signed-off-by: Arnd Bergmann Signed-off-by: J. Bruce Fields --- fs/nfsd/nfs4state.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c index 86f5c273c9ec..aef7c9bb6114 100644 --- a/fs/nfsd/nfs4state.c +++ b/fs/nfsd/nfs4state.c @@ -3861,7 +3861,7 @@ static __be32 nfs4_upgrade_open(struct svc_rqst *rqstp, struct nfs4_file *fp, struct svc_fh *cur_fh, struct nfs4_ol_stateid *stp, struct nfsd4_open *open) { __be32 status; - unsigned char old_deny_bmap; + unsigned char old_deny_bmap = stp->st_deny_bmap; if (!test_access(open->op_share_access, stp)) return nfs4_get_vfs_file(rqstp, fp, cur_fh, stp, open); @@ -3870,7 +3870,6 @@ nfs4_upgrade_open(struct svc_rqst *rqstp, struct nfs4_file *fp, struct svc_fh *c spin_lock(&fp->fi_lock); status = nfs4_file_check_deny(fp, open->op_share_deny); if (status == nfs_ok) { - old_deny_bmap = stp->st_deny_bmap; set_deny(open->op_share_deny, stp); fp->fi_share_deny |= (open->op_share_deny & NFS4_SHARE_DENY_BOTH); From 2f6b3879c24de3404e21be8b76097353caf3bcbd Mon Sep 17 00:00:00 2001 From: Andreas Gruenbacher Date: Fri, 24 Apr 2015 13:04:29 +0200 Subject: [PATCH 07/24] nfsd: Remove dead declarations Signed-off-by: Andreas Gruenbacher Signed-off-by: J. Bruce Fields --- fs/nfsd/nfs4acl.c | 12 ------------ 1 file changed, 12 deletions(-) diff --git a/fs/nfsd/nfs4acl.c b/fs/nfsd/nfs4acl.c index 67242bf7c6cc..7e10e2a6c977 100644 --- a/fs/nfsd/nfs4acl.c +++ b/fs/nfsd/nfs4acl.c @@ -52,10 +52,6 @@ #define NFS4_ANYONE_MODE (NFS4_ACE_READ_ATTRIBUTES | NFS4_ACE_READ_ACL | NFS4_ACE_SYNCHRONIZE) #define NFS4_OWNER_MODE (NFS4_ACE_WRITE_ATTRIBUTES | NFS4_ACE_WRITE_ACL) -/* We don't support these bits; insist they be neither allowed nor denied */ -#define NFS4_MASK_UNSUPP (NFS4_ACE_DELETE | NFS4_ACE_WRITE_OWNER \ - | NFS4_ACE_READ_NAMED_ATTRS | NFS4_ACE_WRITE_NAMED_ATTRS) - /* flags used to simulate posix default ACLs */ #define NFS4_INHERITANCE_FLAGS (NFS4_ACE_FILE_INHERIT_ACE \ | NFS4_ACE_DIRECTORY_INHERIT_ACE) @@ -64,9 +60,6 @@ | NFS4_ACE_INHERIT_ONLY_ACE \ | NFS4_ACE_IDENTIFIER_GROUP) -#define MASK_EQUAL(mask1, mask2) \ - ( ((mask1) & NFS4_ACE_MASK_ALL) == ((mask2) & NFS4_ACE_MASK_ALL) ) - static u32 mask_from_posix(unsigned short perm, unsigned int flags) { @@ -126,11 +119,6 @@ low_mode_from_nfs4(u32 perm, unsigned short *mode, unsigned int flags) *mode |= ACL_EXECUTE; } -struct ace_container { - struct nfs4_ace *ace; - struct list_head ace_l; -}; - static short ace2type(struct nfs4_ace *); static void _posix_to_nfsv4_one(struct posix_acl *, struct nfs4_acl *, unsigned int); From e2b836cfb45d27a5efc5b1b65fe2442c53137d9c Mon Sep 17 00:00:00 2001 From: Andreas Gruenbacher Date: Wed, 22 Apr 2015 23:59:24 +0200 Subject: [PATCH 08/24] uapi/nfs: Add NFSv4.1 ACL definitions Add the ACL related protocol definitions which were added in the NFSv4.1 specification. (But we're not using them yet.) Signed-off-by: Andreas Gruenbacher Signed-off-by: J. Bruce Fields --- include/uapi/linux/nfs4.h | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/include/uapi/linux/nfs4.h b/include/uapi/linux/nfs4.h index adc0aff83fbb..2119c7c274d7 100644 --- a/include/uapi/linux/nfs4.h +++ b/include/uapi/linux/nfs4.h @@ -86,6 +86,10 @@ #define ACL4_SUPPORT_AUDIT_ACL 0x04 #define ACL4_SUPPORT_ALARM_ACL 0x08 +#define NFS4_ACL_AUTO_INHERIT 0x00000001 +#define NFS4_ACL_PROTECTED 0x00000002 +#define NFS4_ACL_DEFAULTED 0x00000004 + #define NFS4_ACE_FILE_INHERIT_ACE 0x00000001 #define NFS4_ACE_DIRECTORY_INHERIT_ACE 0x00000002 #define NFS4_ACE_NO_PROPAGATE_INHERIT_ACE 0x00000004 @@ -93,6 +97,7 @@ #define NFS4_ACE_SUCCESSFUL_ACCESS_ACE_FLAG 0x00000010 #define NFS4_ACE_FAILED_ACCESS_ACE_FLAG 0x00000020 #define NFS4_ACE_IDENTIFIER_GROUP 0x00000040 +#define NFS4_ACE_INHERITED_ACE 0x00000080 #define NFS4_ACE_READ_DATA 0x00000001 #define NFS4_ACE_LIST_DIRECTORY 0x00000001 @@ -106,6 +111,8 @@ #define NFS4_ACE_DELETE_CHILD 0x00000040 #define NFS4_ACE_READ_ATTRIBUTES 0x00000080 #define NFS4_ACE_WRITE_ATTRIBUTES 0x00000100 +#define NFS4_ACE_WRITE_RETENTION 0x00000200 +#define NFS4_ACE_WRITE_RETENTION_HOLD 0x00000400 #define NFS4_ACE_DELETE 0x00010000 #define NFS4_ACE_READ_ACL 0x00020000 #define NFS4_ACE_WRITE_ACL 0x00040000 From 3f87d5d6ace6c4ef1ac36ea4d1848b94e864c0c6 Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Tue, 26 May 2015 13:48:57 -0400 Subject: [PATCH 09/24] SUNRPC: Move EXPORT_SYMBOL for svc_process Clean up. Signed-off-by: Chuck Lever Signed-off-by: J. Bruce Fields --- net/sunrpc/svc.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/sunrpc/svc.c b/net/sunrpc/svc.c index 78974e4d9ad2..852ae606b02a 100644 --- a/net/sunrpc/svc.c +++ b/net/sunrpc/svc.c @@ -1290,7 +1290,6 @@ err_bad: svc_putnl(resv, ntohl(rpc_stat)); goto sendit; } -EXPORT_SYMBOL_GPL(svc_process); /* * Process the RPC request. @@ -1338,6 +1337,7 @@ out_drop: svc_drop(rqstp); return 0; } +EXPORT_SYMBOL_GPL(svc_process); #if defined(CONFIG_SUNRPC_BACKCHANNEL) /* From da7049f834c3582c1ed1a04889bda5b4121973c0 Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Tue, 26 May 2015 13:49:07 -0400 Subject: [PATCH 10/24] svcrdma: Remove svc_rdma_xdr_decode_deferred_req() svc_rdma_xdr_decode_deferred_req() indexes an array with an un-byte-swapped value off the wire. Fortunately this function isn't used anywhere, so simply remove it. Signed-off-by: Chuck Lever Signed-off-by: J. Bruce Fields --- include/linux/sunrpc/svc_rdma.h | 1 - net/sunrpc/xprtrdma/svc_rdma_marshal.c | 56 -------------------------- 2 files changed, 57 deletions(-) diff --git a/include/linux/sunrpc/svc_rdma.h b/include/linux/sunrpc/svc_rdma.h index df8edf8ec914..8ad9b6d9d4e0 100644 --- a/include/linux/sunrpc/svc_rdma.h +++ b/include/linux/sunrpc/svc_rdma.h @@ -182,7 +182,6 @@ struct svcxprt_rdma { /* svc_rdma_marshal.c */ extern int svc_rdma_xdr_decode_req(struct rpcrdma_msg **, struct svc_rqst *); -extern int svc_rdma_xdr_decode_deferred_req(struct svc_rqst *); extern int svc_rdma_xdr_encode_error(struct svcxprt_rdma *, struct rpcrdma_msg *, enum rpcrdma_errcode, u32 *); diff --git a/net/sunrpc/xprtrdma/svc_rdma_marshal.c b/net/sunrpc/xprtrdma/svc_rdma_marshal.c index b681855cf970..45e22c9549d2 100644 --- a/net/sunrpc/xprtrdma/svc_rdma_marshal.c +++ b/net/sunrpc/xprtrdma/svc_rdma_marshal.c @@ -211,62 +211,6 @@ int svc_rdma_xdr_decode_req(struct rpcrdma_msg **rdma_req, return hdr_len; } -int svc_rdma_xdr_decode_deferred_req(struct svc_rqst *rqstp) -{ - struct rpcrdma_msg *rmsgp = NULL; - struct rpcrdma_read_chunk *ch; - struct rpcrdma_write_array *ary; - u32 *va; - u32 hdrlen; - - dprintk("svcrdma: processing deferred RDMA header on rqstp=%p\n", - rqstp); - rmsgp = (struct rpcrdma_msg *)rqstp->rq_arg.head[0].iov_base; - - /* Pull in the extra for the padded case and bump our pointer */ - if (rmsgp->rm_type == RDMA_MSGP) { - va = &rmsgp->rm_body.rm_padded.rm_pempty[4]; - rqstp->rq_arg.head[0].iov_base = va; - hdrlen = (u32)((unsigned long)va - (unsigned long)rmsgp); - rqstp->rq_arg.head[0].iov_len -= hdrlen; - return hdrlen; - } - - /* - * Skip all chunks to find RPC msg. These were previously processed - */ - va = &rmsgp->rm_body.rm_chunks[0]; - - /* Skip read-list */ - for (ch = (struct rpcrdma_read_chunk *)va; - ch->rc_discrim != xdr_zero; ch++); - va = (u32 *)&ch->rc_position; - - /* Skip write-list */ - ary = (struct rpcrdma_write_array *)va; - if (ary->wc_discrim == xdr_zero) - va = (u32 *)&ary->wc_nchunks; - else - /* - * rs_length is the 2nd 4B field in wc_target and taking its - * address skips the list terminator - */ - va = (u32 *)&ary->wc_array[ary->wc_nchunks].wc_target.rs_length; - - /* Skip reply-array */ - ary = (struct rpcrdma_write_array *)va; - if (ary->wc_discrim == xdr_zero) - va = (u32 *)&ary->wc_nchunks; - else - va = (u32 *)&ary->wc_array[ary->wc_nchunks]; - - rqstp->rq_arg.head[0].iov_base = va; - hdrlen = (unsigned long)va - (unsigned long)rmsgp; - rqstp->rq_arg.head[0].iov_len -= hdrlen; - - return hdrlen; -} - int svc_rdma_xdr_encode_error(struct svcxprt_rdma *xprt, struct rpcrdma_msg *rmsgp, enum rpcrdma_errcode err, u32 *va) From 4399396eecfc586a1d92e64fe49c3c899f080436 Mon Sep 17 00:00:00 2001 From: Kinglong Mee Date: Tue, 2 Jun 2015 18:59:19 +0800 Subject: [PATCH 11/24] nfsd: Reset cb_status in nfsd4_cb_prepare() at retrying nfsd enters a infinite loop and prints message every 10 seconds: May 31 18:33:52 test-server kernel: Error sending entire callback! May 31 18:34:01 test-server kernel: Error sending entire callback! This is caused by a cb_layoutreturn getting error -10008 (NFS4ERR_DELAY), the client crashing, and then nfsd entering the infinite loop: bc_sendto --> call_timeout --> nfsd4_cb_done --> nfsd4_cb_layout_done with error -10008 --> rpc_delay(task, HZ/100) --> bc_sendto ... Reproduced using xfstests 074 with nfs client's kdump on, CONFIG_DEFAULT_HUNG_TASK_TIMEOUT set, and client's blkmapd down: 1. nfs client's write operation will get the layout of file, and then send getdeviceinfo, 2. but layout segment is not recorded by client because blkmapd is down, 3. client writes data by sending WRITE to server, 4. nfs server recalls the layout of the file before WRITE, 5. network error causes the client reset the session and return NFS4ERR_DELAY, 6. so client's WRITE operation is waiting the reply. If the task hangs 120s, the client will crash. 7. so that, the next bc_sendto will fail with TIMEOUT, and cb_status is NFS4ERR_DELAY. Signed-off-by: Kinglong Mee Signed-off-by: J. Bruce Fields --- fs/nfsd/nfs4callback.c | 1 + 1 file changed, 1 insertion(+) diff --git a/fs/nfsd/nfs4callback.c b/fs/nfsd/nfs4callback.c index 5694cfb7a47b..8b1ac8d4f011 100644 --- a/fs/nfsd/nfs4callback.c +++ b/fs/nfsd/nfs4callback.c @@ -875,6 +875,7 @@ static void nfsd4_cb_prepare(struct rpc_task *task, void *calldata) u32 minorversion = clp->cl_minorversion; cb->cb_minorversion = minorversion; + cb->cb_status = 0; if (minorversion) { if (!nfsd41_cb_get_slot(clp, task)) return; From 276f03e3ba242ebf2cf201cc3c7058d2884912b7 Mon Sep 17 00:00:00 2001 From: Kinglong Mee Date: Tue, 2 Jun 2015 18:59:25 +0800 Subject: [PATCH 12/24] nfsd: Update callback sequnce id only CB_SEQUENCE success When testing pnfs layout, nfsd got error NFS4ERR_SEQ_MISORDERED. It is caused by nfs return NFS4ERR_DELAY before validate_seqid(), don't update the sequnce id, but nfsd updates the sequnce id !!! According to RFC5661 20.9.3, " If CB_SEQUENCE returns an error, then the state of the slot (sequence ID, cached reply) MUST NOT change. " Signed-off-by: Kinglong Mee Signed-off-by: J. Bruce Fields --- fs/nfsd/nfs4callback.c | 14 ++++++++++++-- fs/nfsd/state.h | 1 + 2 files changed, 13 insertions(+), 2 deletions(-) diff --git a/fs/nfsd/nfs4callback.c b/fs/nfsd/nfs4callback.c index 8b1ac8d4f011..a49201835a97 100644 --- a/fs/nfsd/nfs4callback.c +++ b/fs/nfsd/nfs4callback.c @@ -455,6 +455,7 @@ static int decode_cb_sequence4res(struct xdr_stream *xdr, if (unlikely(status || cb->cb_status)) return status; + cb->cb_update_seq_nr = true; return decode_cb_sequence4resok(xdr, cb); } @@ -875,6 +876,7 @@ static void nfsd4_cb_prepare(struct rpc_task *task, void *calldata) u32 minorversion = clp->cl_minorversion; cb->cb_minorversion = minorversion; + cb->cb_update_seq_nr = false; cb->cb_status = 0; if (minorversion) { if (!nfsd41_cb_get_slot(clp, task)) @@ -892,9 +894,16 @@ static void nfsd4_cb_done(struct rpc_task *task, void *calldata) clp->cl_minorversion); if (clp->cl_minorversion) { - /* No need for lock, access serialized in nfsd4_cb_prepare */ - if (!task->tk_status) + /* + * No need for lock, access serialized in nfsd4_cb_prepare + * + * RFC5661 20.9.3 + * If CB_SEQUENCE returns an error, then the state of the slot + * (sequence ID, cached reply) MUST NOT change. + */ + if (cb->cb_update_seq_nr) ++clp->cl_cb_session->se_cb_seq_nr; + clear_bit(0, &clp->cl_cb_slot_busy); rpc_wake_up_next(&clp->cl_cb_waitq); dprintk("%s: freed slot, new seqid=%d\n", __func__, @@ -1091,6 +1100,7 @@ void nfsd4_init_cb(struct nfsd4_callback *cb, struct nfs4_client *clp, cb->cb_ops = ops; INIT_WORK(&cb->cb_work, nfsd4_run_cb_work); cb->cb_status = 0; + cb->cb_update_seq_nr = false; cb->cb_need_restart = false; } diff --git a/fs/nfsd/state.h b/fs/nfsd/state.h index dbc4f85a5008..4ed7c2ae95eb 100644 --- a/fs/nfsd/state.h +++ b/fs/nfsd/state.h @@ -68,6 +68,7 @@ struct nfsd4_callback { struct nfsd4_callback_ops *cb_ops; struct work_struct cb_work; int cb_status; + bool cb_update_seq_nr; bool cb_need_restart; }; From 70747c25a701b563a54c20c4a77efe8292aad151 Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Thu, 4 Jun 2015 11:20:39 -0400 Subject: [PATCH 13/24] svcrdma: Fix byte-swapping in svc_rdma_sendto.c In send_write_chunks(), we have: for (xdr_off = rqstp->rq_res.head[0].iov_len, chunk_no = 0; xfer_len && chunk_no < arg_ary->wc_nchunks; chunk_no++) { . . . } Note that arg_ary->wc_nchunk is in network byte-order. For the comparison to work correctly, both have to be in native byte-order. In send_reply_chunks, we have: write_len = min(xfer_len, htonl(ch->rs_length)); xfer_len is in native byte-order, and ch->rs_length is in network byte-order. be32_to_cpu() is the correct byte swap for ch->rs_length. As an additional clean up, replace ntohl() with be32_to_cpu() in a few other places. This appears to address a problem with large rsize hangs while using PHYSICAL memory registration. I suspect that is the only registration mode that uses more than one chunk element. BugLink: https://bugzilla.linux-nfs.org/show_bug.cgi?id=248 Signed-off-by: Chuck Lever Signed-off-by: J. Bruce Fields --- net/sunrpc/xprtrdma/svc_rdma_sendto.c | 14 ++++++++------ 1 file changed, 8 insertions(+), 6 deletions(-) diff --git a/net/sunrpc/xprtrdma/svc_rdma_sendto.c b/net/sunrpc/xprtrdma/svc_rdma_sendto.c index 7de33d1af9b6..109e9670be8c 100644 --- a/net/sunrpc/xprtrdma/svc_rdma_sendto.c +++ b/net/sunrpc/xprtrdma/svc_rdma_sendto.c @@ -240,6 +240,7 @@ static int send_write_chunks(struct svcxprt_rdma *xprt, u32 xdr_off; int chunk_off; int chunk_no; + int nchunks; struct rpcrdma_write_array *arg_ary; struct rpcrdma_write_array *res_ary; int ret; @@ -251,14 +252,15 @@ static int send_write_chunks(struct svcxprt_rdma *xprt, &rdma_resp->rm_body.rm_chunks[1]; /* Write chunks start at the pagelist */ + nchunks = be32_to_cpu(arg_ary->wc_nchunks); for (xdr_off = rqstp->rq_res.head[0].iov_len, chunk_no = 0; - xfer_len && chunk_no < arg_ary->wc_nchunks; + xfer_len && chunk_no < nchunks; chunk_no++) { struct rpcrdma_segment *arg_ch; u64 rs_offset; arg_ch = &arg_ary->wc_array[chunk_no].wc_target; - write_len = min(xfer_len, ntohl(arg_ch->rs_length)); + write_len = min(xfer_len, be32_to_cpu(arg_ch->rs_length)); /* Prepare the response chunk given the length actually * written */ @@ -270,7 +272,7 @@ static int send_write_chunks(struct svcxprt_rdma *xprt, chunk_off = 0; while (write_len) { ret = send_write(xprt, rqstp, - ntohl(arg_ch->rs_handle), + be32_to_cpu(arg_ch->rs_handle), rs_offset + chunk_off, xdr_off, write_len, @@ -318,13 +320,13 @@ static int send_reply_chunks(struct svcxprt_rdma *xprt, &rdma_resp->rm_body.rm_chunks[2]; /* xdr offset starts at RPC message */ - nchunks = ntohl(arg_ary->wc_nchunks); + nchunks = be32_to_cpu(arg_ary->wc_nchunks); for (xdr_off = 0, chunk_no = 0; xfer_len && chunk_no < nchunks; chunk_no++) { u64 rs_offset; ch = &arg_ary->wc_array[chunk_no].wc_target; - write_len = min(xfer_len, htonl(ch->rs_length)); + write_len = min(xfer_len, be32_to_cpu(ch->rs_length)); /* Prepare the reply chunk given the length actually * written */ @@ -335,7 +337,7 @@ static int send_reply_chunks(struct svcxprt_rdma *xprt, chunk_off = 0; while (write_len) { ret = send_write(xprt, rqstp, - ntohl(ch->rs_handle), + be32_to_cpu(ch->rs_handle), rs_offset + chunk_off, xdr_off, write_len, From 30b7e246a6222f1fbad39b1451273375306fe1e2 Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Thu, 4 Jun 2015 11:21:10 -0400 Subject: [PATCH 14/24] svcrdma: Keep rpcrdma_msg fields in network byte-order Fields in struct rpcrdma_msg are __be32. Don't byte-swap these fields when decoding RPC calls and then swap them back for the reply. For the most part, they can be left alone. Signed-off-by: Chuck Lever Signed-off-by: J. Bruce Fields --- include/linux/sunrpc/svc_rdma.h | 2 +- net/sunrpc/xprtrdma/svc_rdma_marshal.c | 84 +++++++++++------------- net/sunrpc/xprtrdma/svc_rdma_recvfrom.c | 2 +- net/sunrpc/xprtrdma/svc_rdma_transport.c | 2 +- 4 files changed, 42 insertions(+), 48 deletions(-) diff --git a/include/linux/sunrpc/svc_rdma.h b/include/linux/sunrpc/svc_rdma.h index 8ad9b6d9d4e0..c03ca0a1b743 100644 --- a/include/linux/sunrpc/svc_rdma.h +++ b/include/linux/sunrpc/svc_rdma.h @@ -184,7 +184,7 @@ struct svcxprt_rdma { extern int svc_rdma_xdr_decode_req(struct rpcrdma_msg **, struct svc_rqst *); extern int svc_rdma_xdr_encode_error(struct svcxprt_rdma *, struct rpcrdma_msg *, - enum rpcrdma_errcode, u32 *); + enum rpcrdma_errcode, __be32 *); extern void svc_rdma_xdr_encode_write_list(struct rpcrdma_msg *, int); extern void svc_rdma_xdr_encode_reply_array(struct rpcrdma_write_array *, int); extern void svc_rdma_xdr_encode_array_chunk(struct rpcrdma_write_array *, int, diff --git a/net/sunrpc/xprtrdma/svc_rdma_marshal.c b/net/sunrpc/xprtrdma/svc_rdma_marshal.c index 45e22c9549d2..e2fca7617242 100644 --- a/net/sunrpc/xprtrdma/svc_rdma_marshal.c +++ b/net/sunrpc/xprtrdma/svc_rdma_marshal.c @@ -50,12 +50,12 @@ /* * Decodes a read chunk list. The expected format is as follows: * descrim : xdr_one - * position : u32 offset into XDR stream - * handle : u32 RKEY + * position : __be32 offset into XDR stream + * handle : __be32 RKEY * . . . * end-of-list: xdr_zero */ -static u32 *decode_read_list(u32 *va, u32 *vaend) +static __be32 *decode_read_list(__be32 *va, __be32 *vaend) { struct rpcrdma_read_chunk *ch = (struct rpcrdma_read_chunk *)va; @@ -67,20 +67,20 @@ static u32 *decode_read_list(u32 *va, u32 *vaend) } ch++; } - return (u32 *)&ch->rc_position; + return &ch->rc_position; } /* * Decodes a write chunk list. The expected format is as follows: * descrim : xdr_one * nchunks : - * handle : u32 RKEY ---+ - * length : u32 | + * handle : __be32 RKEY ---+ + * length : __be32 | * offset : remove va + * . . . | * ---+ */ -static u32 *decode_write_list(u32 *va, u32 *vaend) +static __be32 *decode_write_list(__be32 *va, __be32 *vaend) { unsigned long start, end; int nchunks; @@ -90,14 +90,14 @@ static u32 *decode_write_list(u32 *va, u32 *vaend) /* Check for not write-array */ if (ary->wc_discrim == xdr_zero) - return (u32 *)&ary->wc_nchunks; + return &ary->wc_nchunks; if ((unsigned long)ary + sizeof(struct rpcrdma_write_array) > (unsigned long)vaend) { dprintk("svcrdma: ary=%p, vaend=%p\n", ary, vaend); return NULL; } - nchunks = ntohl(ary->wc_nchunks); + nchunks = be32_to_cpu(ary->wc_nchunks); start = (unsigned long)&ary->wc_array[0]; end = (unsigned long)vaend; @@ -112,10 +112,10 @@ static u32 *decode_write_list(u32 *va, u32 *vaend) * rs_length is the 2nd 4B field in wc_target and taking its * address skips the list terminator */ - return (u32 *)&ary->wc_array[nchunks].wc_target.rs_length; + return &ary->wc_array[nchunks].wc_target.rs_length; } -static u32 *decode_reply_array(u32 *va, u32 *vaend) +static __be32 *decode_reply_array(__be32 *va, __be32 *vaend) { unsigned long start, end; int nchunks; @@ -124,14 +124,14 @@ static u32 *decode_reply_array(u32 *va, u32 *vaend) /* Check for no reply-array */ if (ary->wc_discrim == xdr_zero) - return (u32 *)&ary->wc_nchunks; + return &ary->wc_nchunks; if ((unsigned long)ary + sizeof(struct rpcrdma_write_array) > (unsigned long)vaend) { dprintk("svcrdma: ary=%p, vaend=%p\n", ary, vaend); return NULL; } - nchunks = ntohl(ary->wc_nchunks); + nchunks = be32_to_cpu(ary->wc_nchunks); start = (unsigned long)&ary->wc_array[0]; end = (unsigned long)vaend; @@ -142,15 +142,14 @@ static u32 *decode_reply_array(u32 *va, u32 *vaend) ary, nchunks, vaend); return NULL; } - return (u32 *)&ary->wc_array[nchunks]; + return (__be32 *)&ary->wc_array[nchunks]; } int svc_rdma_xdr_decode_req(struct rpcrdma_msg **rdma_req, struct svc_rqst *rqstp) { struct rpcrdma_msg *rmsgp = NULL; - u32 *va; - u32 *vaend; + __be32 *va, *vaend; u32 hdr_len; rmsgp = (struct rpcrdma_msg *)rqstp->rq_arg.head[0].iov_base; @@ -162,22 +161,17 @@ int svc_rdma_xdr_decode_req(struct rpcrdma_msg **rdma_req, return -EINVAL; } - /* Decode the header */ - rmsgp->rm_xid = ntohl(rmsgp->rm_xid); - rmsgp->rm_vers = ntohl(rmsgp->rm_vers); - rmsgp->rm_credit = ntohl(rmsgp->rm_credit); - rmsgp->rm_type = ntohl(rmsgp->rm_type); - - if (rmsgp->rm_vers != RPCRDMA_VERSION) + if (rmsgp->rm_vers != rpcrdma_version) return -ENOSYS; /* Pull in the extra for the padded case and bump our pointer */ - if (rmsgp->rm_type == RDMA_MSGP) { + if (rmsgp->rm_type == rdma_msgp) { int hdrlen; + rmsgp->rm_body.rm_padded.rm_align = - ntohl(rmsgp->rm_body.rm_padded.rm_align); + be32_to_cpu(rmsgp->rm_body.rm_padded.rm_align); rmsgp->rm_body.rm_padded.rm_thresh = - ntohl(rmsgp->rm_body.rm_padded.rm_thresh); + be32_to_cpu(rmsgp->rm_body.rm_padded.rm_thresh); va = &rmsgp->rm_body.rm_padded.rm_pempty[4]; rqstp->rq_arg.head[0].iov_base = va; @@ -192,7 +186,7 @@ int svc_rdma_xdr_decode_req(struct rpcrdma_msg **rdma_req, * chunk list and a reply chunk list. */ va = &rmsgp->rm_body.rm_chunks[0]; - vaend = (u32 *)((unsigned long)rmsgp + rqstp->rq_arg.len); + vaend = (__be32 *)((unsigned long)rmsgp + rqstp->rq_arg.len); va = decode_read_list(va, vaend); if (!va) return -EINVAL; @@ -213,18 +207,18 @@ int svc_rdma_xdr_decode_req(struct rpcrdma_msg **rdma_req, int svc_rdma_xdr_encode_error(struct svcxprt_rdma *xprt, struct rpcrdma_msg *rmsgp, - enum rpcrdma_errcode err, u32 *va) + enum rpcrdma_errcode err, __be32 *va) { - u32 *startp = va; + __be32 *startp = va; - *va++ = htonl(rmsgp->rm_xid); - *va++ = htonl(rmsgp->rm_vers); - *va++ = htonl(xprt->sc_max_requests); - *va++ = htonl(RDMA_ERROR); - *va++ = htonl(err); + *va++ = rmsgp->rm_xid; + *va++ = rmsgp->rm_vers; + *va++ = cpu_to_be32(xprt->sc_max_requests); + *va++ = rdma_error; + *va++ = cpu_to_be32(err); if (err == ERR_VERS) { - *va++ = htonl(RPCRDMA_VERSION); - *va++ = htonl(RPCRDMA_VERSION); + *va++ = rpcrdma_version; + *va++ = rpcrdma_version; } return (int)((unsigned long)va - (unsigned long)startp); @@ -241,7 +235,7 @@ int svc_rdma_xdr_get_reply_hdr_len(struct rpcrdma_msg *rmsgp) &rmsgp->rm_body.rm_chunks[1]; if (wr_ary->wc_discrim) wr_ary = (struct rpcrdma_write_array *) - &wr_ary->wc_array[ntohl(wr_ary->wc_nchunks)]. + &wr_ary->wc_array[be32_to_cpu(wr_ary->wc_nchunks)]. wc_target.rs_length; else wr_ary = (struct rpcrdma_write_array *) @@ -250,7 +244,7 @@ int svc_rdma_xdr_get_reply_hdr_len(struct rpcrdma_msg *rmsgp) /* skip reply array */ if (wr_ary->wc_discrim) wr_ary = (struct rpcrdma_write_array *) - &wr_ary->wc_array[ntohl(wr_ary->wc_nchunks)]; + &wr_ary->wc_array[be32_to_cpu(wr_ary->wc_nchunks)]; else wr_ary = (struct rpcrdma_write_array *) &wr_ary->wc_nchunks; @@ -269,7 +263,7 @@ void svc_rdma_xdr_encode_write_list(struct rpcrdma_msg *rmsgp, int chunks) ary = (struct rpcrdma_write_array *) &rmsgp->rm_body.rm_chunks[1]; ary->wc_discrim = xdr_one; - ary->wc_nchunks = htonl(chunks); + ary->wc_nchunks = cpu_to_be32(chunks); /* write-list terminator */ ary->wc_array[chunks].wc_target.rs_handle = xdr_zero; @@ -282,7 +276,7 @@ void svc_rdma_xdr_encode_reply_array(struct rpcrdma_write_array *ary, int chunks) { ary->wc_discrim = xdr_one; - ary->wc_nchunks = htonl(chunks); + ary->wc_nchunks = cpu_to_be32(chunks); } void svc_rdma_xdr_encode_array_chunk(struct rpcrdma_write_array *ary, @@ -294,7 +288,7 @@ void svc_rdma_xdr_encode_array_chunk(struct rpcrdma_write_array *ary, struct rpcrdma_segment *seg = &ary->wc_array[chunk_no].wc_target; seg->rs_handle = rs_handle; seg->rs_offset = rs_offset; - seg->rs_length = htonl(write_len); + seg->rs_length = cpu_to_be32(write_len); } void svc_rdma_xdr_encode_reply_header(struct svcxprt_rdma *xprt, @@ -302,10 +296,10 @@ void svc_rdma_xdr_encode_reply_header(struct svcxprt_rdma *xprt, struct rpcrdma_msg *rdma_resp, enum rpcrdma_proc rdma_type) { - rdma_resp->rm_xid = htonl(rdma_argp->rm_xid); - rdma_resp->rm_vers = htonl(rdma_argp->rm_vers); - rdma_resp->rm_credit = htonl(xprt->sc_max_requests); - rdma_resp->rm_type = htonl(rdma_type); + rdma_resp->rm_xid = rdma_argp->rm_xid; + rdma_resp->rm_vers = rdma_argp->rm_vers; + rdma_resp->rm_credit = cpu_to_be32(xprt->sc_max_requests); + rdma_resp->rm_type = cpu_to_be32(rdma_type); /* Encode chunks lists */ rdma_resp->rm_body.rm_chunks[0] = xdr_zero; diff --git a/net/sunrpc/xprtrdma/svc_rdma_recvfrom.c b/net/sunrpc/xprtrdma/svc_rdma_recvfrom.c index f9f13a32ddb8..ac93ce01a729 100644 --- a/net/sunrpc/xprtrdma/svc_rdma_recvfrom.c +++ b/net/sunrpc/xprtrdma/svc_rdma_recvfrom.c @@ -85,7 +85,7 @@ static void rdma_build_arg_xdr(struct svc_rqst *rqstp, /* RDMA_NOMSG: RDMA READ data should land just after RDMA RECV data */ rmsgp = (struct rpcrdma_msg *)rqstp->rq_arg.head[0].iov_base; - if (be32_to_cpu(rmsgp->rm_type) == RDMA_NOMSG) + if (rmsgp->rm_type == rdma_nomsg) rqstp->rq_arg.pages = &rqstp->rq_pages[0]; else rqstp->rq_arg.pages = &rqstp->rq_pages[1]; diff --git a/net/sunrpc/xprtrdma/svc_rdma_transport.c b/net/sunrpc/xprtrdma/svc_rdma_transport.c index f609c1c2d38d..be084938e3ff 100644 --- a/net/sunrpc/xprtrdma/svc_rdma_transport.c +++ b/net/sunrpc/xprtrdma/svc_rdma_transport.c @@ -1319,7 +1319,7 @@ void svc_rdma_send_error(struct svcxprt_rdma *xprt, struct rpcrdma_msg *rmsgp, struct ib_send_wr err_wr; struct page *p; struct svc_rdma_op_ctxt *ctxt; - u32 *va; + __be32 *va; int length; int ret; From b7e0b9a965a116341b4ef86ab98ea2843b218271 Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Thu, 4 Jun 2015 11:21:20 -0400 Subject: [PATCH 15/24] svcrdma: Replace GFP_KERNEL in a loop with GFP_NOFAIL At the 2015 LSF/MM, it was requested that memory allocation call sites that request GFP_KERNEL allocations in a loop should be annotated with __GFP_NOFAIL. Signed-off-by: Chuck Lever Signed-off-by: J. Bruce Fields --- include/linux/sunrpc/svc_rdma.h | 1 - net/sunrpc/xprtrdma/svc_rdma_sendto.c | 2 +- net/sunrpc/xprtrdma/svc_rdma_transport.c | 32 +++++------------------- 3 files changed, 7 insertions(+), 28 deletions(-) diff --git a/include/linux/sunrpc/svc_rdma.h b/include/linux/sunrpc/svc_rdma.h index c03ca0a1b743..d26384b22126 100644 --- a/include/linux/sunrpc/svc_rdma.h +++ b/include/linux/sunrpc/svc_rdma.h @@ -211,7 +211,6 @@ extern int svc_rdma_sendto(struct svc_rqst *); extern int svc_rdma_send(struct svcxprt_rdma *, struct ib_send_wr *); extern void svc_rdma_send_error(struct svcxprt_rdma *, struct rpcrdma_msg *, enum rpcrdma_errcode); -struct page *svc_rdma_get_page(void); extern int svc_rdma_post_recv(struct svcxprt_rdma *); extern int svc_rdma_create_listen(struct svc_serv *, int, struct sockaddr *); extern struct svc_rdma_op_ctxt *svc_rdma_get_context(struct svcxprt_rdma *); diff --git a/net/sunrpc/xprtrdma/svc_rdma_sendto.c b/net/sunrpc/xprtrdma/svc_rdma_sendto.c index 109e9670be8c..d25cd430f9ff 100644 --- a/net/sunrpc/xprtrdma/svc_rdma_sendto.c +++ b/net/sunrpc/xprtrdma/svc_rdma_sendto.c @@ -517,7 +517,7 @@ int svc_rdma_sendto(struct svc_rqst *rqstp) inline_bytes = rqstp->rq_res.len; /* Create the RDMA response header */ - res_page = svc_rdma_get_page(); + res_page = alloc_page(GFP_KERNEL | __GFP_NOFAIL); rdma_resp = page_address(res_page); reply_ary = svc_rdma_get_reply_array(rdma_argp); if (reply_ary) diff --git a/net/sunrpc/xprtrdma/svc_rdma_transport.c b/net/sunrpc/xprtrdma/svc_rdma_transport.c index be084938e3ff..1ed4740ff0b5 100644 --- a/net/sunrpc/xprtrdma/svc_rdma_transport.c +++ b/net/sunrpc/xprtrdma/svc_rdma_transport.c @@ -99,12 +99,8 @@ struct svc_rdma_op_ctxt *svc_rdma_get_context(struct svcxprt_rdma *xprt) { struct svc_rdma_op_ctxt *ctxt; - while (1) { - ctxt = kmem_cache_alloc(svc_rdma_ctxt_cachep, GFP_KERNEL); - if (ctxt) - break; - schedule_timeout_uninterruptible(msecs_to_jiffies(500)); - } + ctxt = kmem_cache_alloc(svc_rdma_ctxt_cachep, + GFP_KERNEL | __GFP_NOFAIL); ctxt->xprt = xprt; INIT_LIST_HEAD(&ctxt->dto_q); ctxt->count = 0; @@ -156,12 +152,8 @@ void svc_rdma_put_context(struct svc_rdma_op_ctxt *ctxt, int free_pages) struct svc_rdma_req_map *svc_rdma_get_req_map(void) { struct svc_rdma_req_map *map; - while (1) { - map = kmem_cache_alloc(svc_rdma_map_cachep, GFP_KERNEL); - if (map) - break; - schedule_timeout_uninterruptible(msecs_to_jiffies(500)); - } + map = kmem_cache_alloc(svc_rdma_map_cachep, + GFP_KERNEL | __GFP_NOFAIL); map->count = 0; return map; } @@ -490,18 +482,6 @@ static struct svcxprt_rdma *rdma_create_xprt(struct svc_serv *serv, return cma_xprt; } -struct page *svc_rdma_get_page(void) -{ - struct page *page; - - while ((page = alloc_page(GFP_KERNEL)) == NULL) { - /* If we can't get memory, wait a bit and try again */ - printk(KERN_INFO "svcrdma: out of memory...retrying in 1s\n"); - schedule_timeout_uninterruptible(msecs_to_jiffies(1000)); - } - return page; -} - int svc_rdma_post_recv(struct svcxprt_rdma *xprt) { struct ib_recv_wr recv_wr, *bad_recv_wr; @@ -520,7 +500,7 @@ int svc_rdma_post_recv(struct svcxprt_rdma *xprt) pr_err("svcrdma: Too many sges (%d)\n", sge_no); goto err_put_ctxt; } - page = svc_rdma_get_page(); + page = alloc_page(GFP_KERNEL | __GFP_NOFAIL); ctxt->pages[sge_no] = page; pa = ib_dma_map_page(xprt->sc_cm_id->device, page, 0, PAGE_SIZE, @@ -1323,7 +1303,7 @@ void svc_rdma_send_error(struct svcxprt_rdma *xprt, struct rpcrdma_msg *rmsgp, int length; int ret; - p = svc_rdma_get_page(); + p = alloc_page(GFP_KERNEL | __GFP_NOFAIL); va = page_address(p); /* XDR encode error */ From 0380a3f37540ad0582b3c749a74fc127af914689 Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Thu, 4 Jun 2015 11:21:32 -0400 Subject: [PATCH 16/24] svcrdma: Add a separate "max data segs macro for svcrdma The server and client maximum are architecturally independent. Allow changing one without affecting the other. Signed-off-by: Chuck Lever Signed-off-by: J. Bruce Fields --- include/linux/sunrpc/svc_rdma.h | 7 +++++++ net/sunrpc/xprtrdma/svc_rdma_transport.c | 2 +- net/sunrpc/xprtrdma/xprt_rdma.h | 6 ------ 3 files changed, 8 insertions(+), 7 deletions(-) diff --git a/include/linux/sunrpc/svc_rdma.h b/include/linux/sunrpc/svc_rdma.h index d26384b22126..cb94ee4181d4 100644 --- a/include/linux/sunrpc/svc_rdma.h +++ b/include/linux/sunrpc/svc_rdma.h @@ -172,6 +172,13 @@ struct svcxprt_rdma { #define RDMAXPRT_SQ_PENDING 2 #define RDMAXPRT_CONN_PENDING 3 +#define RPCRDMA_MAX_SVC_SEGS (64) /* server max scatter/gather */ +#if RPCSVC_MAXPAYLOAD < (RPCRDMA_MAX_SVC_SEGS << PAGE_SHIFT) +#define RPCRDMA_MAXPAYLOAD RPCSVC_MAXPAYLOAD +#else +#define RPCRDMA_MAXPAYLOAD (RPCRDMA_MAX_SVC_SEGS << PAGE_SHIFT) +#endif + #define RPCRDMA_LISTEN_BACKLOG 10 /* The default ORD value is based on two outstanding full-size writes with a * page size of 4k, or 32k * 2 ops / 4k = 16 outstanding RDMA_READ. */ diff --git a/net/sunrpc/xprtrdma/svc_rdma_transport.c b/net/sunrpc/xprtrdma/svc_rdma_transport.c index 1ed4740ff0b5..3b4c2ff66a2f 100644 --- a/net/sunrpc/xprtrdma/svc_rdma_transport.c +++ b/net/sunrpc/xprtrdma/svc_rdma_transport.c @@ -91,7 +91,7 @@ struct svc_xprt_class svc_rdma_class = { .xcl_name = "rdma", .xcl_owner = THIS_MODULE, .xcl_ops = &svc_rdma_ops, - .xcl_max_payload = RPCSVC_MAXPAYLOAD_RDMA, + .xcl_max_payload = RPCRDMA_MAXPAYLOAD, .xcl_ident = XPRT_TRANSPORT_RDMA, }; diff --git a/net/sunrpc/xprtrdma/xprt_rdma.h b/net/sunrpc/xprtrdma/xprt_rdma.h index 78e0b8beaa36..e60907b0e1f1 100644 --- a/net/sunrpc/xprtrdma/xprt_rdma.h +++ b/net/sunrpc/xprtrdma/xprt_rdma.h @@ -487,10 +487,4 @@ extern struct kmem_cache *svc_rdma_ctxt_cachep; /* Workqueue created in svc_rdma.c */ extern struct workqueue_struct *svc_rdma_wq; -#if RPCSVC_MAXPAYLOAD < (RPCRDMA_MAX_DATA_SEGS << PAGE_SHIFT) -#define RPCSVC_MAXPAYLOAD_RDMA RPCSVC_MAXPAYLOAD -#else -#define RPCSVC_MAXPAYLOAD_RDMA (RPCRDMA_MAX_DATA_SEGS << PAGE_SHIFT) -#endif - #endif /* _LINUX_SUNRPC_XPRT_RDMA_H */ From ffe1f0df586237a18f9b568597bddabb56e96d5e Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Thu, 4 Jun 2015 11:21:42 -0400 Subject: [PATCH 17/24] rpcrdma: Merge svcrdma and xprtrdma modules into one Bi-directional RPC support means code in svcrdma.ko invokes a bit of code in xprtrdma.ko, and vice versa. To avoid loader/linker loops, merge the server and client side modules together into a single module. When backchannel capabilities are added, the combined module will register all needed transport capabilities so that Upper Layer consumers automatically have everything needed to create a bi-directional transport connection. Module aliases are added for backwards compatibility with user space, which still may expect svcrdma.ko or xprtrdma.ko to be present. This commit reverts commit 2e8c12e1b765 ("xprtrdma: add separate Kconfig options for NFSoRDMA client and server support") and provides a single CONFIG option for enabling the new module. Signed-off-by: Chuck Lever Signed-off-by: J. Bruce Fields --- net/sunrpc/Kconfig | 28 ++++++-------------- net/sunrpc/Makefile | 3 +-- net/sunrpc/xprtrdma/Makefile | 14 +++++----- net/sunrpc/xprtrdma/module.c | 46 +++++++++++++++++++++++++++++++++ net/sunrpc/xprtrdma/svc_rdma.c | 8 +----- net/sunrpc/xprtrdma/transport.c | 13 ++-------- net/sunrpc/xprtrdma/xprt_rdma.h | 5 ++++ 7 files changed, 69 insertions(+), 48 deletions(-) create mode 100644 net/sunrpc/xprtrdma/module.c diff --git a/net/sunrpc/Kconfig b/net/sunrpc/Kconfig index 9068e72aa73c..04ce2c0b660e 100644 --- a/net/sunrpc/Kconfig +++ b/net/sunrpc/Kconfig @@ -48,28 +48,16 @@ config SUNRPC_DEBUG If unsure, say Y. -config SUNRPC_XPRT_RDMA_CLIENT - tristate "RPC over RDMA Client Support" +config SUNRPC_XPRT_RDMA + tristate "RPC-over-RDMA transport" depends on SUNRPC && INFINIBAND && INFINIBAND_ADDR_TRANS default SUNRPC && INFINIBAND help - This option allows the NFS client to support an RDMA-enabled - transport. + This option allows the NFS client and server to use RDMA + transports (InfiniBand, iWARP, or RoCE). - To compile RPC client RDMA transport support as a module, - choose M here: the module will be called xprtrdma. + To compile this support as a module, choose M. The module + will be called rpcrdma.ko. - If unsure, say N. - -config SUNRPC_XPRT_RDMA_SERVER - tristate "RPC over RDMA Server Support" - depends on SUNRPC && INFINIBAND && INFINIBAND_ADDR_TRANS - default SUNRPC && INFINIBAND - help - This option allows the NFS server to support an RDMA-enabled - transport. - - To compile RPC server RDMA transport support as a module, - choose M here: the module will be called svcrdma. - - If unsure, say N. + If unsure, or you know there is no RDMA capability on your + hardware platform, say N. diff --git a/net/sunrpc/Makefile b/net/sunrpc/Makefile index 15e6f6c23c5d..936ad0a15371 100644 --- a/net/sunrpc/Makefile +++ b/net/sunrpc/Makefile @@ -5,8 +5,7 @@ obj-$(CONFIG_SUNRPC) += sunrpc.o obj-$(CONFIG_SUNRPC_GSS) += auth_gss/ - -obj-y += xprtrdma/ +obj-$(CONFIG_SUNRPC_XPRT_RDMA) += xprtrdma/ sunrpc-y := clnt.o xprt.o socklib.o xprtsock.o sched.o \ auth.o auth_null.o auth_unix.o auth_generic.o \ diff --git a/net/sunrpc/xprtrdma/Makefile b/net/sunrpc/xprtrdma/Makefile index 579f72bbcf4b..48913de240bd 100644 --- a/net/sunrpc/xprtrdma/Makefile +++ b/net/sunrpc/xprtrdma/Makefile @@ -1,9 +1,7 @@ -obj-$(CONFIG_SUNRPC_XPRT_RDMA_CLIENT) += xprtrdma.o +obj-$(CONFIG_SUNRPC_XPRT_RDMA) += rpcrdma.o -xprtrdma-y := transport.o rpc_rdma.o verbs.o \ - fmr_ops.o frwr_ops.o physical_ops.o - -obj-$(CONFIG_SUNRPC_XPRT_RDMA_SERVER) += svcrdma.o - -svcrdma-y := svc_rdma.o svc_rdma_transport.o \ - svc_rdma_marshal.o svc_rdma_sendto.o svc_rdma_recvfrom.o +rpcrdma-y := transport.o rpc_rdma.o verbs.o \ + fmr_ops.o frwr_ops.o physical_ops.o \ + svc_rdma.o svc_rdma_transport.o \ + svc_rdma_marshal.o svc_rdma_sendto.o svc_rdma_recvfrom.o \ + module.o diff --git a/net/sunrpc/xprtrdma/module.c b/net/sunrpc/xprtrdma/module.c new file mode 100644 index 000000000000..560712bd9fa2 --- /dev/null +++ b/net/sunrpc/xprtrdma/module.c @@ -0,0 +1,46 @@ +/* + * Copyright (c) 2015 Oracle. All rights reserved. + */ + +/* rpcrdma.ko module initialization + */ + +#include +#include +#include +#include "xprt_rdma.h" + +#if IS_ENABLED(CONFIG_SUNRPC_DEBUG) +# define RPCDBG_FACILITY RPCDBG_TRANS +#endif + +MODULE_AUTHOR("Open Grid Computing and Network Appliance, Inc."); +MODULE_DESCRIPTION("RPC/RDMA Transport"); +MODULE_LICENSE("Dual BSD/GPL"); +MODULE_ALIAS("svcrdma"); +MODULE_ALIAS("xprtrdma"); + +static void __exit rpc_rdma_cleanup(void) +{ + xprt_rdma_cleanup(); + svc_rdma_cleanup(); +} + +static int __init rpc_rdma_init(void) +{ + int rc; + + rc = svc_rdma_init(); + if (rc) + goto out; + + rc = xprt_rdma_init(); + if (rc) + svc_rdma_cleanup(); + +out: + return rc; +} + +module_init(rpc_rdma_init); +module_exit(rpc_rdma_cleanup); diff --git a/net/sunrpc/xprtrdma/svc_rdma.c b/net/sunrpc/xprtrdma/svc_rdma.c index c1b6270262c2..2cd252f023a5 100644 --- a/net/sunrpc/xprtrdma/svc_rdma.c +++ b/net/sunrpc/xprtrdma/svc_rdma.c @@ -38,8 +38,7 @@ * * Author: Tom Tucker */ -#include -#include + #include #include #include @@ -295,8 +294,3 @@ int svc_rdma_init(void) destroy_workqueue(svc_rdma_wq); return -ENOMEM; } -MODULE_AUTHOR("Tom Tucker "); -MODULE_DESCRIPTION("SVC RDMA Transport"); -MODULE_LICENSE("Dual BSD/GPL"); -module_init(svc_rdma_init); -module_exit(svc_rdma_cleanup); diff --git a/net/sunrpc/xprtrdma/transport.c b/net/sunrpc/xprtrdma/transport.c index 54f23b1be986..436da2caec95 100644 --- a/net/sunrpc/xprtrdma/transport.c +++ b/net/sunrpc/xprtrdma/transport.c @@ -48,7 +48,6 @@ */ #include -#include #include #include #include @@ -59,11 +58,6 @@ # define RPCDBG_FACILITY RPCDBG_TRANS #endif -MODULE_LICENSE("Dual BSD/GPL"); - -MODULE_DESCRIPTION("RPC/RDMA Transport for Linux kernel NFS"); -MODULE_AUTHOR("Network Appliance, Inc."); - /* * tunables */ @@ -711,7 +705,7 @@ static struct xprt_class xprt_rdma = { .setup = xprt_setup_rdma, }; -static void __exit xprt_rdma_cleanup(void) +void xprt_rdma_cleanup(void) { int rc; @@ -728,7 +722,7 @@ static void __exit xprt_rdma_cleanup(void) __func__, rc); } -static int __init xprt_rdma_init(void) +int xprt_rdma_init(void) { int rc; @@ -753,6 +747,3 @@ static int __init xprt_rdma_init(void) #endif return 0; } - -module_init(xprt_rdma_init); -module_exit(xprt_rdma_cleanup); diff --git a/net/sunrpc/xprtrdma/xprt_rdma.h b/net/sunrpc/xprtrdma/xprt_rdma.h index e60907b0e1f1..58163b88738c 100644 --- a/net/sunrpc/xprtrdma/xprt_rdma.h +++ b/net/sunrpc/xprtrdma/xprt_rdma.h @@ -480,6 +480,11 @@ void rpcrdma_reply_handler(struct rpcrdma_rep *); */ int rpcrdma_marshal_req(struct rpc_rqst *); +/* RPC/RDMA module init - xprtrdma/transport.c + */ +int xprt_rdma_init(void); +void xprt_rdma_cleanup(void); + /* Temporary NFS request map cache. Created in svc_rdma.c */ extern struct kmem_cache *svc_rdma_map_cachep; /* WR context cache. Created in svc_rdma.c */ From 97b1f9aae963cc0b229ef8147db4782170564d4f Mon Sep 17 00:00:00 2001 From: Fabian Frederick Date: Fri, 12 Jun 2015 18:58:57 +0200 Subject: [PATCH 18/24] nfsd: use swap() in sort_pacl_range() Use kernel.h macro definition. Thanks to Julia Lawall for Coccinelle scripting support. Signed-off-by: Fabian Frederick Signed-off-by: J. Bruce Fields --- fs/nfsd/nfs4acl.c | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/fs/nfsd/nfs4acl.c b/fs/nfsd/nfs4acl.c index 7e10e2a6c977..eb5accf1b37f 100644 --- a/fs/nfsd/nfs4acl.c +++ b/fs/nfsd/nfs4acl.c @@ -372,7 +372,6 @@ pace_gt(struct posix_acl_entry *pace1, struct posix_acl_entry *pace2) static void sort_pacl_range(struct posix_acl *pacl, int start, int end) { int sorted = 0, i; - struct posix_acl_entry tmp; /* We just do a bubble sort; easy to do in place, and we're not * expecting acl's to be long enough to justify anything more. */ @@ -382,9 +381,8 @@ sort_pacl_range(struct posix_acl *pacl, int start, int end) { if (pace_gt(&pacl->a_entries[i], &pacl->a_entries[i+1])) { sorted = 0; - tmp = pacl->a_entries[i]; - pacl->a_entries[i] = pacl->a_entries[i+1]; - pacl->a_entries[i+1] = tmp; + swap(pacl->a_entries[i], + pacl->a_entries[i + 1]); } } } From e749a4621e414c36a54ac8b3205955e267f00db3 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Thu, 18 Jun 2015 16:44:58 +0200 Subject: [PATCH 19/24] nfsd: clean up raparams handling Refactor the raparam hash helpers to just deal with the raparms, and keep opening/closing files separate from that. Signed-off-by: Christoph Hellwig Signed-off-by: J. Bruce Fields --- fs/nfsd/nfs4xdr.c | 24 +++++++++++------ fs/nfsd/vfs.c | 68 +++++++++++++++++------------------------------ fs/nfsd/vfs.h | 6 ++--- 3 files changed, 43 insertions(+), 55 deletions(-) diff --git a/fs/nfsd/nfs4xdr.c b/fs/nfsd/nfs4xdr.c index 5c7eab9c12ba..5286206169bc 100644 --- a/fs/nfsd/nfs4xdr.c +++ b/fs/nfsd/nfs4xdr.c @@ -33,6 +33,7 @@ * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ +#include #include #include #include @@ -3419,7 +3420,7 @@ nfsd4_encode_read(struct nfsd4_compoundres *resp, __be32 nfserr, struct file *file = read->rd_filp; struct svc_fh *fhp = read->rd_fhp; int starting_len = xdr->buf->len; - struct raparms *ra; + struct raparms *ra = NULL; __be32 *p; __be32 err; @@ -3441,23 +3442,30 @@ nfsd4_encode_read(struct nfsd4_compoundres *resp, __be32 nfserr, maxcount = min_t(unsigned long, maxcount, (xdr->buf->buflen - xdr->buf->len)); maxcount = min_t(unsigned long, maxcount, read->rd_length); - if (read->rd_filp) + if (read->rd_filp) { err = nfsd_permission(resp->rqstp, fhp->fh_export, fhp->fh_dentry, NFSD_MAY_READ|NFSD_MAY_OWNER_OVERRIDE); - else - err = nfsd_get_tmp_read_open(resp->rqstp, read->rd_fhp, - &file, &ra); - if (err) - goto err_truncate; + if (err) + goto err_truncate; + } else { + err = nfsd_open(resp->rqstp, fhp, S_IFREG, NFSD_MAY_READ, + &file); + if (err) + goto err_truncate; + + ra = nfsd_init_raparms(file); + } if (file->f_op->splice_read && test_bit(RQ_SPLICE_OK, &resp->rqstp->rq_flags)) err = nfsd4_encode_splice_read(resp, read, file, maxcount); else err = nfsd4_encode_readv(resp, read, file, maxcount); + if (ra) + nfsd_put_raparams(file, ra); if (!read->rd_filp) - nfsd_put_tmp_read_open(file, ra); + fput(file); err_truncate: if (err) diff --git a/fs/nfsd/vfs.c b/fs/nfsd/vfs.c index b540ca789906..52f2dd3898c4 100644 --- a/fs/nfsd/vfs.c +++ b/fs/nfsd/vfs.c @@ -725,14 +725,12 @@ out: return err; } -/* - * Obtain the readahead parameters for the file - * specified by (dev, ino). - */ - -static inline struct raparms * -nfsd_get_raparms(dev_t dev, ino_t ino) +struct raparms * +nfsd_init_raparms(struct file *file) { + struct inode *inode = file_inode(file); + dev_t dev = inode->i_sb->s_dev; + ino_t ino = inode->i_ino; struct raparms *ra, **rap, **frap = NULL; int depth = 0; unsigned int hash; @@ -769,9 +767,23 @@ found: ra->p_count++; nfsdstats.ra_depth[depth*10/nfsdstats.ra_size]++; spin_unlock(&rab->pb_lock); + + if (ra->p_set) + file->f_ra = ra->p_ra; return ra; } +void nfsd_put_raparams(struct file *file, struct raparms *ra) +{ + struct raparm_hbucket *rab = &raparm_hash[ra->p_hindex]; + + spin_lock(&rab->pb_lock); + ra->p_ra = file->f_ra; + ra->p_set = 1; + ra->p_count--; + spin_unlock(&rab->pb_lock); +} + /* * Grab and keep cached pages associated with a file in the svc_rqst * so that they can be passed to the network sendmsg/sendpage routines @@ -964,40 +976,6 @@ out_nfserr: return err; } -__be32 nfsd_get_tmp_read_open(struct svc_rqst *rqstp, struct svc_fh *fhp, - struct file **file, struct raparms **ra) -{ - struct inode *inode; - __be32 err; - - err = nfsd_open(rqstp, fhp, S_IFREG, NFSD_MAY_READ, file); - if (err) - return err; - - inode = file_inode(*file); - - /* Get readahead parameters */ - *ra = nfsd_get_raparms(inode->i_sb->s_dev, inode->i_ino); - - if (*ra && (*ra)->p_set) - (*file)->f_ra = (*ra)->p_ra; - return nfs_ok; -} - -void nfsd_put_tmp_read_open(struct file *file, struct raparms *ra) -{ - /* Write back readahead params */ - if (ra) { - struct raparm_hbucket *rab = &raparm_hash[ra->p_hindex]; - spin_lock(&rab->pb_lock); - ra->p_ra = file->f_ra; - ra->p_set = 1; - ra->p_count--; - spin_unlock(&rab->pb_lock); - } - fput(file); -} - /* * Read data from a file. count must contain the requested read count * on entry. On return, *count contains the number of bytes actually read. @@ -1010,13 +988,15 @@ __be32 nfsd_read(struct svc_rqst *rqstp, struct svc_fh *fhp, struct raparms *ra; __be32 err; - err = nfsd_get_tmp_read_open(rqstp, fhp, &file, &ra); + err = nfsd_open(rqstp, fhp, S_IFREG, NFSD_MAY_READ, &file); if (err) return err; + ra = nfsd_init_raparms(file); err = nfsd_vfs_read(rqstp, file, offset, vec, vlen, count); - - nfsd_put_tmp_read_open(file, ra); + if (ra) + nfsd_put_raparams(file, ra); + fput(file); return err; } diff --git a/fs/nfsd/vfs.h b/fs/nfsd/vfs.h index 17a5e0db6a77..053c4addbd9d 100644 --- a/fs/nfsd/vfs.h +++ b/fs/nfsd/vfs.h @@ -72,9 +72,6 @@ __be32 nfsd_commit(struct svc_rqst *, struct svc_fh *, __be32 nfsd_open(struct svc_rqst *, struct svc_fh *, umode_t, int, struct file **); struct raparms; -__be32 nfsd_get_tmp_read_open(struct svc_rqst *, struct svc_fh *, - struct file **, struct raparms **); -void nfsd_put_tmp_read_open(struct file *, struct raparms *); __be32 nfsd_splice_read(struct svc_rqst *, struct file *, loff_t, unsigned long *); __be32 nfsd_readv(struct file *, loff_t, struct kvec *, int, @@ -103,6 +100,9 @@ __be32 nfsd_statfs(struct svc_rqst *, struct svc_fh *, __be32 nfsd_permission(struct svc_rqst *, struct svc_export *, struct dentry *, int); +struct raparms *nfsd_init_raparms(struct file *file); +void nfsd_put_raparams(struct file *file, struct raparms *ra); + static inline int fh_want_write(struct svc_fh *fh) { int ret = mnt_want_write(fh->fh_export->ex_path.mnt); From a0649b2d3fffb1cde8745568c767f3a55a3462bc Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Thu, 18 Jun 2015 16:44:59 +0200 Subject: [PATCH 20/24] nfsd: refactor nfs4_preprocess_stateid_op Split out two self contained helpers to make the function more readable. Signed-off-by: Christoph Hellwig Signed-off-by: J. Bruce Fields --- fs/nfsd/nfs4state.c | 97 ++++++++++++++++++++++++--------------------- 1 file changed, 52 insertions(+), 45 deletions(-) diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c index aef7c9bb6114..f8d50817fafc 100644 --- a/fs/nfsd/nfs4state.c +++ b/fs/nfsd/nfs4state.c @@ -4573,20 +4573,51 @@ nfsd4_lookup_stateid(struct nfsd4_compound_state *cstate, return nfs_ok; } +static struct file * +nfs4_find_file(struct nfs4_stid *s, int flags) +{ + switch (s->sc_type) { + case NFS4_DELEG_STID: + if (WARN_ON_ONCE(!s->sc_file->fi_deleg_file)) + return NULL; + return get_file(s->sc_file->fi_deleg_file); + case NFS4_OPEN_STID: + case NFS4_LOCK_STID: + if (flags & RD_STATE) + return find_readable_file(s->sc_file); + else + return find_writeable_file(s->sc_file); + break; + } + + return NULL; +} + +static __be32 +nfs4_check_olstateid(struct svc_fh *fhp, struct nfs4_ol_stateid *ols, int flags) +{ + __be32 status; + + status = nfs4_check_fh(fhp, ols); + if (status) + return status; + status = nfsd4_check_openowner_confirmed(ols); + if (status) + return status; + return nfs4_check_openmode(ols, flags); +} + /* -* Checks for stateid operations -*/ + * Checks for stateid operations + */ __be32 nfs4_preprocess_stateid_op(struct net *net, struct nfsd4_compound_state *cstate, stateid_t *stateid, int flags, struct file **filpp) { - struct nfs4_stid *s; - struct nfs4_ol_stateid *stp = NULL; - struct nfs4_delegation *dp = NULL; - struct svc_fh *current_fh = &cstate->current_fh; - struct inode *ino = d_inode(current_fh->fh_dentry); + struct svc_fh *fhp = &cstate->current_fh; + struct inode *ino = d_inode(fhp->fh_dentry); struct nfsd_net *nn = net_generic(net, nfsd_net_id); - struct file *file = NULL; + struct nfs4_stid *s; __be32 status; if (filpp) @@ -4596,60 +4627,36 @@ nfs4_preprocess_stateid_op(struct net *net, struct nfsd4_compound_state *cstate, return nfserr_grace; if (ZERO_STATEID(stateid) || ONE_STATEID(stateid)) - return check_special_stateids(net, current_fh, stateid, flags); + return check_special_stateids(net, fhp, stateid, flags); status = nfsd4_lookup_stateid(cstate, stateid, NFS4_DELEG_STID|NFS4_OPEN_STID|NFS4_LOCK_STID, &s, nn); if (status) return status; - status = check_stateid_generation(stateid, &s->sc_stateid, nfsd4_has_session(cstate)); + status = check_stateid_generation(stateid, &s->sc_stateid, + nfsd4_has_session(cstate)); if (status) goto out; + switch (s->sc_type) { case NFS4_DELEG_STID: - dp = delegstateid(s); - status = nfs4_check_delegmode(dp, flags); - if (status) - goto out; - if (filpp) { - file = dp->dl_stid.sc_file->fi_deleg_file; - if (!file) { - WARN_ON_ONCE(1); - status = nfserr_serverfault; - goto out; - } - get_file(file); - } + status = nfs4_check_delegmode(delegstateid(s), flags); break; case NFS4_OPEN_STID: case NFS4_LOCK_STID: - stp = openlockstateid(s); - status = nfs4_check_fh(current_fh, stp); - if (status) - goto out; - status = nfsd4_check_openowner_confirmed(stp); - if (status) - goto out; - status = nfs4_check_openmode(stp, flags); - if (status) - goto out; - if (filpp) { - struct nfs4_file *fp = stp->st_stid.sc_file; - - if (flags & RD_STATE) - file = find_readable_file(fp); - else - file = find_writeable_file(fp); - } + status = nfs4_check_olstateid(fhp, openlockstateid(s), flags); break; default: status = nfserr_bad_stateid; - goto out; + break; + } + + if (!status && filpp) { + *filpp = nfs4_find_file(s, flags); + if (!*filpp) + status = nfserr_serverfault; } - status = nfs_ok; - if (file) - *filpp = file; out: nfs4_put_stid(s); return status; From af90f707fa6d54dbb725c4b919c976cd23cd07f2 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Thu, 18 Jun 2015 16:45:00 +0200 Subject: [PATCH 21/24] nfsd: take struct file setup fully into nfs4_preprocess_stateid_op This patch changes nfs4_preprocess_stateid_op so it always returns a valid struct file if it has been asked for that. For that we now allocate a temporary struct file for special stateids, and check permissions if we got the file structure from the stateid. This ensures that all callers will get their handling of special stateids right, and avoids code duplication. There is a little wart in here because the read code needs to know if we allocated a file structure so that it can copy around the read-ahead parameters. In the long run we should probably aim to cache full file structures used with special stateids instead. Signed-off-by: Christoph Hellwig Signed-off-by: J. Bruce Fields --- fs/nfsd/nfs4proc.c | 39 ++++++++++++----------------- fs/nfsd/nfs4state.c | 60 ++++++++++++++++++++++++++++++++++++--------- fs/nfsd/nfs4xdr.c | 19 +------------- fs/nfsd/state.h | 6 ++--- fs/nfsd/vfs.c | 7 +----- fs/nfsd/vfs.h | 4 +++ fs/nfsd/xdr4.h | 1 + 7 files changed, 74 insertions(+), 62 deletions(-) diff --git a/fs/nfsd/nfs4proc.c b/fs/nfsd/nfs4proc.c index 864e2003e8de..5aa7c4e7fc8c 100644 --- a/fs/nfsd/nfs4proc.c +++ b/fs/nfsd/nfs4proc.c @@ -760,8 +760,6 @@ nfsd4_read(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate, { __be32 status; - /* no need to check permission - this will be done in nfsd_read() */ - read->rd_filp = NULL; if (read->rd_offset >= OFFSET_MAX) return nfserr_inval; @@ -778,9 +776,9 @@ nfsd4_read(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate, clear_bit(RQ_SPLICE_OK, &rqstp->rq_flags); /* check stateid */ - if ((status = nfs4_preprocess_stateid_op(SVC_NET(rqstp), - cstate, &read->rd_stateid, - RD_STATE, &read->rd_filp))) { + status = nfs4_preprocess_stateid_op(rqstp, cstate, &read->rd_stateid, + RD_STATE, &read->rd_filp, &read->rd_tmp_file); + if (status) { dprintk("NFSD: nfsd4_read: couldn't process stateid!\n"); goto out; } @@ -924,8 +922,8 @@ nfsd4_setattr(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate, int err; if (setattr->sa_iattr.ia_valid & ATTR_SIZE) { - status = nfs4_preprocess_stateid_op(SVC_NET(rqstp), cstate, - &setattr->sa_stateid, WR_STATE, NULL); + status = nfs4_preprocess_stateid_op(rqstp, cstate, + &setattr->sa_stateid, WR_STATE, NULL, NULL); if (status) { dprintk("NFSD: nfsd4_setattr: couldn't process stateid!\n"); return status; @@ -986,13 +984,11 @@ nfsd4_write(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate, unsigned long cnt; int nvecs; - /* no need to check permission - this will be done in nfsd_write() */ - if (write->wr_offset >= OFFSET_MAX) return nfserr_inval; - status = nfs4_preprocess_stateid_op(SVC_NET(rqstp), - cstate, stateid, WR_STATE, &filp); + status = nfs4_preprocess_stateid_op(rqstp, cstate, stateid, WR_STATE, + &filp, NULL); if (status) { dprintk("NFSD: nfsd4_write: couldn't process stateid!\n"); return status; @@ -1005,11 +1001,10 @@ nfsd4_write(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate, nvecs = fill_in_write_vector(rqstp->rq_vec, write); WARN_ON_ONCE(nvecs > ARRAY_SIZE(rqstp->rq_vec)); - status = nfsd_write(rqstp, &cstate->current_fh, filp, - write->wr_offset, rqstp->rq_vec, nvecs, - &cnt, &write->wr_how_written); - if (filp) - fput(filp); + status = nfsd_vfs_write(rqstp, &cstate->current_fh, filp, + write->wr_offset, rqstp->rq_vec, nvecs, &cnt, + &write->wr_how_written); + fput(filp); write->wr_bytes_written = cnt; @@ -1023,15 +1018,13 @@ nfsd4_fallocate(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate, __be32 status = nfserr_notsupp; struct file *file; - status = nfs4_preprocess_stateid_op(SVC_NET(rqstp), cstate, + status = nfs4_preprocess_stateid_op(rqstp, cstate, &fallocate->falloc_stateid, - WR_STATE, &file); + WR_STATE, &file, NULL); if (status != nfs_ok) { dprintk("NFSD: nfsd4_fallocate: couldn't process stateid!\n"); return status; } - if (!file) - return nfserr_bad_stateid; status = nfsd4_vfs_fallocate(rqstp, &cstate->current_fh, file, fallocate->falloc_offset, @@ -1064,15 +1057,13 @@ nfsd4_seek(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate, __be32 status; struct file *file; - status = nfs4_preprocess_stateid_op(SVC_NET(rqstp), cstate, + status = nfs4_preprocess_stateid_op(rqstp, cstate, &seek->seek_stateid, - RD_STATE, &file); + RD_STATE, &file, NULL); if (status) { dprintk("NFSD: nfsd4_seek: couldn't process stateid!\n"); return status; } - if (!file) - return nfserr_bad_stateid; switch (seek->seek_whence) { case NFS4_CONTENT_DATA: diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c index f8d50817fafc..61dfb33f0559 100644 --- a/fs/nfsd/nfs4state.c +++ b/fs/nfsd/nfs4state.c @@ -4576,6 +4576,9 @@ nfsd4_lookup_stateid(struct nfsd4_compound_state *cstate, static struct file * nfs4_find_file(struct nfs4_stid *s, int flags) { + if (!s) + return NULL; + switch (s->sc_type) { case NFS4_DELEG_STID: if (WARN_ON_ONCE(!s->sc_file->fi_deleg_file)) @@ -4607,27 +4610,63 @@ nfs4_check_olstateid(struct svc_fh *fhp, struct nfs4_ol_stateid *ols, int flags) return nfs4_check_openmode(ols, flags); } +static __be32 +nfs4_check_file(struct svc_rqst *rqstp, struct svc_fh *fhp, struct nfs4_stid *s, + struct file **filpp, bool *tmp_file, int flags) +{ + int acc = (flags & RD_STATE) ? NFSD_MAY_READ : NFSD_MAY_WRITE; + struct file *file; + __be32 status; + + file = nfs4_find_file(s, flags); + if (file) { + status = nfsd_permission(rqstp, fhp->fh_export, fhp->fh_dentry, + acc | NFSD_MAY_OWNER_OVERRIDE); + if (status) { + fput(file); + return status; + } + + *filpp = file; + } else { + status = nfsd_open(rqstp, fhp, S_IFREG, acc, filpp); + if (status) + return status; + + if (tmp_file) + *tmp_file = true; + } + + return 0; +} + /* * Checks for stateid operations */ __be32 -nfs4_preprocess_stateid_op(struct net *net, struct nfsd4_compound_state *cstate, - stateid_t *stateid, int flags, struct file **filpp) +nfs4_preprocess_stateid_op(struct svc_rqst *rqstp, + struct nfsd4_compound_state *cstate, stateid_t *stateid, + int flags, struct file **filpp, bool *tmp_file) { struct svc_fh *fhp = &cstate->current_fh; struct inode *ino = d_inode(fhp->fh_dentry); + struct net *net = SVC_NET(rqstp); struct nfsd_net *nn = net_generic(net, nfsd_net_id); - struct nfs4_stid *s; + struct nfs4_stid *s = NULL; __be32 status; if (filpp) *filpp = NULL; + if (tmp_file) + *tmp_file = false; if (grace_disallows_io(net, ino)) return nfserr_grace; - if (ZERO_STATEID(stateid) || ONE_STATEID(stateid)) - return check_special_stateids(net, fhp, stateid, flags); + if (ZERO_STATEID(stateid) || ONE_STATEID(stateid)) { + status = check_special_stateids(net, fhp, stateid, flags); + goto done; + } status = nfsd4_lookup_stateid(cstate, stateid, NFS4_DELEG_STID|NFS4_OPEN_STID|NFS4_LOCK_STID, @@ -4652,13 +4691,12 @@ nfs4_preprocess_stateid_op(struct net *net, struct nfsd4_compound_state *cstate, break; } - if (!status && filpp) { - *filpp = nfs4_find_file(s, flags); - if (!*filpp) - status = nfserr_serverfault; - } +done: + if (!status && filpp) + status = nfs4_check_file(rqstp, fhp, s, filpp, tmp_file, flags); out: - nfs4_put_stid(s); + if (s) + nfs4_put_stid(s); return status; } diff --git a/fs/nfsd/nfs4xdr.c b/fs/nfsd/nfs4xdr.c index 5286206169bc..3b3b3fbd9319 100644 --- a/fs/nfsd/nfs4xdr.c +++ b/fs/nfsd/nfs4xdr.c @@ -33,7 +33,6 @@ * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ -#include #include #include #include @@ -3418,7 +3417,6 @@ nfsd4_encode_read(struct nfsd4_compoundres *resp, __be32 nfserr, unsigned long maxcount; struct xdr_stream *xdr = &resp->xdr; struct file *file = read->rd_filp; - struct svc_fh *fhp = read->rd_fhp; int starting_len = xdr->buf->len; struct raparms *ra = NULL; __be32 *p; @@ -3442,20 +3440,8 @@ nfsd4_encode_read(struct nfsd4_compoundres *resp, __be32 nfserr, maxcount = min_t(unsigned long, maxcount, (xdr->buf->buflen - xdr->buf->len)); maxcount = min_t(unsigned long, maxcount, read->rd_length); - if (read->rd_filp) { - err = nfsd_permission(resp->rqstp, fhp->fh_export, - fhp->fh_dentry, - NFSD_MAY_READ|NFSD_MAY_OWNER_OVERRIDE); - if (err) - goto err_truncate; - } else { - err = nfsd_open(resp->rqstp, fhp, S_IFREG, NFSD_MAY_READ, - &file); - if (err) - goto err_truncate; - + if (read->rd_tmp_file) ra = nfsd_init_raparms(file); - } if (file->f_op->splice_read && test_bit(RQ_SPLICE_OK, &resp->rqstp->rq_flags)) err = nfsd4_encode_splice_read(resp, read, file, maxcount); @@ -3464,10 +3450,7 @@ nfsd4_encode_read(struct nfsd4_compoundres *resp, __be32 nfserr, if (ra) nfsd_put_raparams(file, ra); - if (!read->rd_filp) - fput(file); -err_truncate: if (err) xdr_truncate_encode(xdr, starting_len); return err; diff --git a/fs/nfsd/state.h b/fs/nfsd/state.h index 4ed7c2ae95eb..4874ce515fc1 100644 --- a/fs/nfsd/state.h +++ b/fs/nfsd/state.h @@ -583,9 +583,9 @@ enum nfsd4_cb_op { struct nfsd4_compound_state; struct nfsd_net; -extern __be32 nfs4_preprocess_stateid_op(struct net *net, - struct nfsd4_compound_state *cstate, - stateid_t *stateid, int flags, struct file **filp); +extern __be32 nfs4_preprocess_stateid_op(struct svc_rqst *rqstp, + struct nfsd4_compound_state *cstate, stateid_t *stateid, + int flags, struct file **filp, bool *tmp_file); __be32 nfsd4_lookup_stateid(struct nfsd4_compound_state *cstate, stateid_t *stateid, unsigned char typemask, struct nfs4_stid **s, struct nfsd_net *nn); diff --git a/fs/nfsd/vfs.c b/fs/nfsd/vfs.c index 52f2dd3898c4..b5e077a6e7d4 100644 --- a/fs/nfsd/vfs.c +++ b/fs/nfsd/vfs.c @@ -502,16 +502,11 @@ __be32 nfsd4_vfs_fallocate(struct svc_rqst *rqstp, struct svc_fh *fhp, struct file *file, loff_t offset, loff_t len, int flags) { - __be32 err; int error; if (!S_ISREG(file_inode(file)->i_mode)) return nfserr_inval; - err = nfsd_permission(rqstp, fhp->fh_export, fhp->fh_dentry, NFSD_MAY_WRITE); - if (err) - return err; - error = vfs_fallocate(file, flags, offset, len); if (!error) error = commit_metadata(fhp); @@ -912,7 +907,7 @@ static int wait_for_concurrent_writes(struct file *file) return err; } -static __be32 +__be32 nfsd_vfs_write(struct svc_rqst *rqstp, struct svc_fh *fhp, struct file *file, loff_t offset, struct kvec *vec, int vlen, unsigned long *cnt, int *stablep) diff --git a/fs/nfsd/vfs.h b/fs/nfsd/vfs.h index 053c4addbd9d..5be875e3e638 100644 --- a/fs/nfsd/vfs.h +++ b/fs/nfsd/vfs.h @@ -80,6 +80,10 @@ __be32 nfsd_read(struct svc_rqst *, struct svc_fh *, loff_t, struct kvec *, int, unsigned long *); __be32 nfsd_write(struct svc_rqst *, struct svc_fh *,struct file *, loff_t, struct kvec *,int, unsigned long *, int *); +__be32 nfsd_vfs_write(struct svc_rqst *rqstp, struct svc_fh *fhp, + struct file *file, loff_t offset, + struct kvec *vec, int vlen, unsigned long *cnt, + int *stablep); __be32 nfsd_readlink(struct svc_rqst *, struct svc_fh *, char *, int *); __be32 nfsd_symlink(struct svc_rqst *, struct svc_fh *, diff --git a/fs/nfsd/xdr4.h b/fs/nfsd/xdr4.h index 2f8c092be2b3..9f991007a578 100644 --- a/fs/nfsd/xdr4.h +++ b/fs/nfsd/xdr4.h @@ -273,6 +273,7 @@ struct nfsd4_read { u32 rd_length; /* request */ int rd_vlen; struct file *rd_filp; + bool rd_tmp_file; struct svc_rqst *rd_rqstp; /* response */ struct svc_fh * rd_fhp; /* response */ From 96bcad506457cfa0c26680446eedefb616c6b079 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Thu, 18 Jun 2015 16:45:01 +0200 Subject: [PATCH 22/24] nfsd: fput rd_file from XDR encode context Remove the hack where we fput the read-specific file in generic code. Instead we can do it in nfsd4_encode_read as that gets called for all error cases as well. Signed-off-by: Christoph Hellwig Signed-off-by: J. Bruce Fields --- fs/nfsd/nfs4proc.c | 4 ---- fs/nfsd/nfs4xdr.c | 22 ++++++++++++++-------- 2 files changed, 14 insertions(+), 12 deletions(-) diff --git a/fs/nfsd/nfs4proc.c b/fs/nfsd/nfs4proc.c index 5aa7c4e7fc8c..90cfda75313c 100644 --- a/fs/nfsd/nfs4proc.c +++ b/fs/nfsd/nfs4proc.c @@ -1723,10 +1723,6 @@ encode_op: be32_to_cpu(status)); nfsd4_cstate_clear_replay(cstate); - /* XXX Ugh, we need to get rid of this kind of special case: */ - if (op->opnum == OP_READ && op->u.read.rd_filp) - fput(op->u.read.rd_filp); - nfsd4_increment_op_stats(op->opnum); } diff --git a/fs/nfsd/nfs4xdr.c b/fs/nfsd/nfs4xdr.c index 3b3b3fbd9319..ca09fec7a4bf 100644 --- a/fs/nfsd/nfs4xdr.c +++ b/fs/nfsd/nfs4xdr.c @@ -33,6 +33,7 @@ * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ +#include #include #include #include @@ -3420,19 +3421,20 @@ nfsd4_encode_read(struct nfsd4_compoundres *resp, __be32 nfserr, int starting_len = xdr->buf->len; struct raparms *ra = NULL; __be32 *p; - __be32 err; if (nfserr) - return nfserr; + goto out; p = xdr_reserve_space(xdr, 8); /* eof flag and byte count */ if (!p) { WARN_ON_ONCE(test_bit(RQ_SPLICE_OK, &resp->rqstp->rq_flags)); - return nfserr_resource; + nfserr = nfserr_resource; + goto out; } if (resp->xdr.buf->page_len && test_bit(RQ_SPLICE_OK, &resp->rqstp->rq_flags)) { WARN_ON_ONCE(1); - return nfserr_resource; + nfserr = nfserr_resource; + goto out; } xdr_commit_encode(xdr); @@ -3444,16 +3446,20 @@ nfsd4_encode_read(struct nfsd4_compoundres *resp, __be32 nfserr, ra = nfsd_init_raparms(file); if (file->f_op->splice_read && test_bit(RQ_SPLICE_OK, &resp->rqstp->rq_flags)) - err = nfsd4_encode_splice_read(resp, read, file, maxcount); + nfserr = nfsd4_encode_splice_read(resp, read, file, maxcount); else - err = nfsd4_encode_readv(resp, read, file, maxcount); + nfserr = nfsd4_encode_readv(resp, read, file, maxcount); if (ra) nfsd_put_raparams(file, ra); - if (err) + if (nfserr) xdr_truncate_encode(xdr, starting_len); - return err; + +out: + if (file) + fput(file); + return nfserr; } static __be32 From 68e8bb0334dbad55285573682c38d8f6664fce68 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Thu, 18 Jun 2015 16:45:02 +0200 Subject: [PATCH 23/24] nfsd: wrap too long lines in nfsd4_encode_read Signed-off-by: Christoph Hellwig Signed-off-by: J. Bruce Fields --- fs/nfsd/nfs4xdr.c | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/fs/nfsd/nfs4xdr.c b/fs/nfsd/nfs4xdr.c index ca09fec7a4bf..54633858733a 100644 --- a/fs/nfsd/nfs4xdr.c +++ b/fs/nfsd/nfs4xdr.c @@ -3431,7 +3431,8 @@ nfsd4_encode_read(struct nfsd4_compoundres *resp, __be32 nfserr, nfserr = nfserr_resource; goto out; } - if (resp->xdr.buf->page_len && test_bit(RQ_SPLICE_OK, &resp->rqstp->rq_flags)) { + if (resp->xdr.buf->page_len && + test_bit(RQ_SPLICE_OK, &resp->rqstp->rq_flags)) { WARN_ON_ONCE(1); nfserr = nfserr_resource; goto out; @@ -3439,13 +3440,15 @@ nfsd4_encode_read(struct nfsd4_compoundres *resp, __be32 nfserr, xdr_commit_encode(xdr); maxcount = svc_max_payload(resp->rqstp); - maxcount = min_t(unsigned long, maxcount, (xdr->buf->buflen - xdr->buf->len)); + maxcount = min_t(unsigned long, maxcount, + (xdr->buf->buflen - xdr->buf->len)); maxcount = min_t(unsigned long, maxcount, read->rd_length); if (read->rd_tmp_file) ra = nfsd_init_raparms(file); - if (file->f_op->splice_read && test_bit(RQ_SPLICE_OK, &resp->rqstp->rq_flags)) + if (file->f_op->splice_read && + test_bit(RQ_SPLICE_OK, &resp->rqstp->rq_flags)) nfserr = nfsd4_encode_splice_read(resp, read, file, maxcount); else nfserr = nfsd4_encode_readv(resp, read, file, maxcount); From 901f1379f6c9dc2d73b51971d129a6f7d5b9b20a Mon Sep 17 00:00:00 2001 From: Fabian Frederick Date: Tue, 16 Jun 2015 21:57:52 +0200 Subject: [PATCH 24/24] sunrpc: use sg_init_one() in krb5_rc4_setup_enc/seq_key() Don't opencode sg_init_one() Signed-off-by: Fabian Frederick Signed-off-by: J. Bruce Fields --- net/sunrpc/auth_gss/gss_krb5_crypto.c | 8 ++------ 1 file changed, 2 insertions(+), 6 deletions(-) diff --git a/net/sunrpc/auth_gss/gss_krb5_crypto.c b/net/sunrpc/auth_gss/gss_krb5_crypto.c index b5408e8a37f2..fee3c15a4b52 100644 --- a/net/sunrpc/auth_gss/gss_krb5_crypto.c +++ b/net/sunrpc/auth_gss/gss_krb5_crypto.c @@ -881,9 +881,7 @@ krb5_rc4_setup_seq_key(struct krb5_ctx *kctx, struct crypto_blkcipher *cipher, if (err) goto out_err; - sg_init_table(sg, 1); - sg_set_buf(sg, &zeroconstant, 4); - + sg_init_one(sg, &zeroconstant, 4); err = crypto_hash_digest(&desc, sg, 4, Kseq); if (err) goto out_err; @@ -951,9 +949,7 @@ krb5_rc4_setup_enc_key(struct krb5_ctx *kctx, struct crypto_blkcipher *cipher, if (err) goto out_err; - sg_init_table(sg, 1); - sg_set_buf(sg, zeroconstant, 4); - + sg_init_one(sg, zeroconstant, 4); err = crypto_hash_digest(&desc, sg, 4, Kcrypt); if (err) goto out_err;