Merge branch 'for-3.16' of git://linux-nfs.org/~bfields/linux

Pull nfsd updates from Bruce Fields:
 "The largest piece is a long-overdue rewrite of the xdr code to remove
  some annoying limitations: for example, there was no way to return
  ACLs larger than 4K, and readdir results were returned only in 4k
  chunks, limiting performance on large directories.

  Also:
        - part of Neil Brown's work to make NFS work reliably over the
          loopback interface (so client and server can run on the same
          machine without deadlocks).  The rest of it is coming through
          other trees.
        - cleanup and bugfixes for some of the server RDMA code, from
          Steve Wise.
        - Various cleanup of NFSv4 state code in preparation for an
          overhaul of the locking, from Jeff, Trond, and Benny.
        - smaller bugfixes and cleanup from Christoph Hellwig and
          Kinglong Mee.

  Thanks to everyone!

  This summer looks likely to be busier than usual for knfsd.  Hopefully
  we won't break it too badly; testing definitely welcomed"

* 'for-3.16' of git://linux-nfs.org/~bfields/linux: (100 commits)
  nfsd4: fix FREE_STATEID lockowner leak
  svcrdma: Fence LOCAL_INV work requests
  svcrdma: refactor marshalling logic
  nfsd: don't halt scanning the DRC LRU list when there's an RC_INPROG entry
  nfs4: remove unused CHANGE_SECURITY_LABEL
  nfsd4: kill READ64
  nfsd4: kill READ32
  nfsd4: simplify server xdr->next_page use
  nfsd4: hash deleg stateid only on successful nfs4_set_delegation
  nfsd4: rename recall_lock to state_lock
  nfsd: remove unneeded zeroing of fields in nfsd4_proc_compound
  nfsd: fix setting of NFS4_OO_CONFIRMED in nfsd4_open
  nfsd4: use recall_lock for delegation hashing
  nfsd: fix laundromat next-run-time calculation
  nfsd: make nfsd4_encode_fattr static
  SUNRPC/NFSD: Remove using of dprintk with KERN_WARNING
  nfsd: remove unused function nfsd_read_file
  nfsd: getattr for FATTR4_WORD0_FILES_AVAIL needs the statfs buffer
  NFSD: Error out when getting more than one fsloc/secinfo/uuid
  NFSD: Using type of uint32_t for ex_nflavors instead of int
  ...
This commit is contained in:
Linus Torvalds 2014-06-10 11:50:57 -07:00
commit 5b174fd647
53 changed files with 2298 additions and 2032 deletions

View File

@ -176,7 +176,5 @@ Nonstandard compound limitations:
ca_maxrequestsize request and a ca_maxresponsesize reply, so we may ca_maxrequestsize request and a ca_maxresponsesize reply, so we may
fail to live up to the promise we made in CREATE_SESSION fore channel fail to live up to the promise we made in CREATE_SESSION fore channel
negotiation. negotiation.
* No more than one read-like operation allowed per compound; encoding
replies that cross page boundaries (except for read data) not handled.
See also http://wiki.linux-nfs.org/wiki/index.php/Server_4.0_and_4.1_issues. See also http://wiki.linux-nfs.org/wiki/index.php/Server_4.0_and_4.1_issues.

View File

@ -14,6 +14,8 @@
#include <linux/sunrpc/stats.h> #include <linux/sunrpc/stats.h>
#include <linux/lockd/lockd.h> #include <linux/lockd/lockd.h>
#include <uapi/linux/nfs3.h>
#define NLMDBG_FACILITY NLMDBG_XDR #define NLMDBG_FACILITY NLMDBG_XDR
#if (NLMCLNT_OHSIZE > XDR_MAX_NETOBJ) #if (NLMCLNT_OHSIZE > XDR_MAX_NETOBJ)

View File

@ -15,6 +15,8 @@
#include <linux/sunrpc/stats.h> #include <linux/sunrpc/stats.h>
#include <linux/lockd/lockd.h> #include <linux/lockd/lockd.h>
#include <uapi/linux/nfs2.h>
#define NLMDBG_FACILITY NLMDBG_XDR #define NLMDBG_FACILITY NLMDBG_XDR
#if (NLMCLNT_OHSIZE > XDR_MAX_NETOBJ) #if (NLMCLNT_OHSIZE > XDR_MAX_NETOBJ)

View File

@ -622,8 +622,8 @@ static int __init init_nlm(void)
err_pernet: err_pernet:
#ifdef CONFIG_SYSCTL #ifdef CONFIG_SYSCTL
unregister_sysctl_table(nlm_sysctl_table); unregister_sysctl_table(nlm_sysctl_table);
#endif
err_sysctl: err_sysctl:
#endif
return err; return err;
} }

View File

@ -14,12 +14,11 @@
#include <linux/mutex.h> #include <linux/mutex.h>
#include <linux/sunrpc/svc.h> #include <linux/sunrpc/svc.h>
#include <linux/sunrpc/addr.h> #include <linux/sunrpc/addr.h>
#include <linux/nfsd/nfsfh.h>
#include <linux/nfsd/export.h>
#include <linux/lockd/lockd.h> #include <linux/lockd/lockd.h>
#include <linux/lockd/share.h> #include <linux/lockd/share.h>
#include <linux/module.h> #include <linux/module.h>
#include <linux/mount.h> #include <linux/mount.h>
#include <uapi/linux/nfs2.h>
#define NLMDBG_FACILITY NLMDBG_SVCSUBS #define NLMDBG_FACILITY NLMDBG_SVCSUBS

View File

@ -16,6 +16,8 @@
#include <linux/sunrpc/stats.h> #include <linux/sunrpc/stats.h>
#include <linux/lockd/lockd.h> #include <linux/lockd/lockd.h>
#include <uapi/linux/nfs2.h>
#define NLMDBG_FACILITY NLMDBG_XDR #define NLMDBG_FACILITY NLMDBG_XDR

View File

@ -2750,7 +2750,7 @@ static void nfs4_close_context(struct nfs_open_context *ctx, int is_sync)
#define FATTR4_WORD1_NFS40_MASK (2*FATTR4_WORD1_MOUNTED_ON_FILEID - 1UL) #define FATTR4_WORD1_NFS40_MASK (2*FATTR4_WORD1_MOUNTED_ON_FILEID - 1UL)
#define FATTR4_WORD2_NFS41_MASK (2*FATTR4_WORD2_SUPPATTR_EXCLCREAT - 1UL) #define FATTR4_WORD2_NFS41_MASK (2*FATTR4_WORD2_SUPPATTR_EXCLCREAT - 1UL)
#define FATTR4_WORD2_NFS42_MASK (2*FATTR4_WORD2_CHANGE_SECURITY_LABEL - 1UL) #define FATTR4_WORD2_NFS42_MASK (2*FATTR4_WORD2_SECURITY_LABEL - 1UL)
static int _nfs4_server_capabilities(struct nfs_server *server, struct nfs_fh *fhandle) static int _nfs4_server_capabilities(struct nfs_server *server, struct nfs_fh *fhandle)
{ {

View File

@ -49,7 +49,7 @@ struct svc_rqst;
struct nfs4_acl *nfs4_acl_new(int); struct nfs4_acl *nfs4_acl_new(int);
int nfs4_acl_get_whotype(char *, u32); int nfs4_acl_get_whotype(char *, u32);
__be32 nfs4_acl_write_who(int who, __be32 **p, int *len); __be32 nfs4_acl_write_who(struct xdr_stream *xdr, int who);
int nfsd4_get_nfs4_acl(struct svc_rqst *rqstp, struct dentry *dentry, int nfsd4_get_nfs4_acl(struct svc_rqst *rqstp, struct dentry *dentry,
struct nfs4_acl **acl); struct nfs4_acl **acl);

View File

@ -1,7 +1,6 @@
/* Copyright (C) 1995, 1996 Olaf Kirch <okir@monad.swb.de> */ /* Copyright (C) 1995, 1996 Olaf Kirch <okir@monad.swb.de> */
#include <linux/sched.h> #include <linux/sched.h>
#include <linux/user_namespace.h>
#include "nfsd.h" #include "nfsd.h"
#include "auth.h" #include "auth.h"
@ -25,7 +24,6 @@ int nfsd_setuser(struct svc_rqst *rqstp, struct svc_export *exp)
struct cred *new; struct cred *new;
int i; int i;
int flags = nfsexp_flags(rqstp, exp); int flags = nfsexp_flags(rqstp, exp);
int ret;
validate_process_creds(); validate_process_creds();
@ -86,8 +84,7 @@ int nfsd_setuser(struct svc_rqst *rqstp, struct svc_export *exp)
return 0; return 0;
oom: oom:
ret = -ENOMEM;
abort_creds(new); abort_creds(new);
return ret; return -ENOMEM;
} }

View File

@ -17,17 +17,12 @@
#include <linux/exportfs.h> #include <linux/exportfs.h>
#include <linux/sunrpc/svc_xprt.h> #include <linux/sunrpc/svc_xprt.h>
#include <net/ipv6.h>
#include "nfsd.h" #include "nfsd.h"
#include "nfsfh.h" #include "nfsfh.h"
#include "netns.h" #include "netns.h"
#define NFSDDBG_FACILITY NFSDDBG_EXPORT #define NFSDDBG_FACILITY NFSDDBG_EXPORT
typedef struct auth_domain svc_client;
typedef struct svc_export svc_export;
/* /*
* We have two caches. * We have two caches.
* One maps client+vfsmnt+dentry to export options - the export map * One maps client+vfsmnt+dentry to export options - the export map
@ -73,7 +68,7 @@ static struct svc_expkey *svc_expkey_lookup(struct cache_detail *cd, struct svc_
static int expkey_parse(struct cache_detail *cd, char *mesg, int mlen) static int expkey_parse(struct cache_detail *cd, char *mesg, int mlen)
{ {
/* client fsidtype fsid [path] */ /* client fsidtype fsid expiry [path] */
char *buf; char *buf;
int len; int len;
struct auth_domain *dom = NULL; struct auth_domain *dom = NULL;
@ -295,13 +290,19 @@ svc_expkey_update(struct cache_detail *cd, struct svc_expkey *new,
static void nfsd4_fslocs_free(struct nfsd4_fs_locations *fsloc) static void nfsd4_fslocs_free(struct nfsd4_fs_locations *fsloc)
{ {
struct nfsd4_fs_location *locations = fsloc->locations;
int i; int i;
if (!locations)
return;
for (i = 0; i < fsloc->locations_count; i++) { for (i = 0; i < fsloc->locations_count; i++) {
kfree(fsloc->locations[i].path); kfree(locations[i].path);
kfree(fsloc->locations[i].hosts); kfree(locations[i].hosts);
} }
kfree(fsloc->locations);
kfree(locations);
fsloc->locations = NULL;
} }
static void svc_export_put(struct kref *ref) static void svc_export_put(struct kref *ref)
@ -388,6 +389,10 @@ fsloc_parse(char **mesg, char *buf, struct nfsd4_fs_locations *fsloc)
int len; int len;
int migrated, i, err; int migrated, i, err;
/* more than one fsloc */
if (fsloc->locations)
return -EINVAL;
/* listsize */ /* listsize */
err = get_uint(mesg, &fsloc->locations_count); err = get_uint(mesg, &fsloc->locations_count);
if (err) if (err)
@ -437,13 +442,18 @@ out_free_all:
static int secinfo_parse(char **mesg, char *buf, struct svc_export *exp) static int secinfo_parse(char **mesg, char *buf, struct svc_export *exp)
{ {
int listsize, err;
struct exp_flavor_info *f; struct exp_flavor_info *f;
u32 listsize;
int err;
err = get_int(mesg, &listsize); /* more than one secinfo */
if (exp->ex_nflavors)
return -EINVAL;
err = get_uint(mesg, &listsize);
if (err) if (err)
return err; return err;
if (listsize < 0 || listsize > MAX_SECINFO_LIST) if (listsize > MAX_SECINFO_LIST)
return -EINVAL; return -EINVAL;
for (f = exp->ex_flavors; f < exp->ex_flavors + listsize; f++) { for (f = exp->ex_flavors; f < exp->ex_flavors + listsize; f++) {
@ -474,6 +484,27 @@ static inline int
secinfo_parse(char **mesg, char *buf, struct svc_export *exp) { return 0; } secinfo_parse(char **mesg, char *buf, struct svc_export *exp) { return 0; }
#endif #endif
static inline int
uuid_parse(char **mesg, char *buf, unsigned char **puuid)
{
int len;
/* more than one uuid */
if (*puuid)
return -EINVAL;
/* expect a 16 byte uuid encoded as \xXXXX... */
len = qword_get(mesg, buf, PAGE_SIZE);
if (len != EX_UUID_LEN)
return -EINVAL;
*puuid = kmemdup(buf, EX_UUID_LEN, GFP_KERNEL);
if (*puuid == NULL)
return -ENOMEM;
return 0;
}
static int svc_export_parse(struct cache_detail *cd, char *mesg, int mlen) static int svc_export_parse(struct cache_detail *cd, char *mesg, int mlen)
{ {
/* client path expiry [flags anonuid anongid fsid] */ /* client path expiry [flags anonuid anongid fsid] */
@ -552,18 +583,9 @@ static int svc_export_parse(struct cache_detail *cd, char *mesg, int mlen)
while ((len = qword_get(&mesg, buf, PAGE_SIZE)) > 0) { while ((len = qword_get(&mesg, buf, PAGE_SIZE)) > 0) {
if (strcmp(buf, "fsloc") == 0) if (strcmp(buf, "fsloc") == 0)
err = fsloc_parse(&mesg, buf, &exp.ex_fslocs); err = fsloc_parse(&mesg, buf, &exp.ex_fslocs);
else if (strcmp(buf, "uuid") == 0) { else if (strcmp(buf, "uuid") == 0)
/* expect a 16 byte uuid encoded as \xXXXX... */ err = uuid_parse(&mesg, buf, &exp.ex_uuid);
len = qword_get(&mesg, buf, PAGE_SIZE); else if (strcmp(buf, "secinfo") == 0)
if (len != 16)
err = -EINVAL;
else {
exp.ex_uuid =
kmemdup(buf, 16, GFP_KERNEL);
if (exp.ex_uuid == NULL)
err = -ENOMEM;
}
} else if (strcmp(buf, "secinfo") == 0)
err = secinfo_parse(&mesg, buf, &exp); err = secinfo_parse(&mesg, buf, &exp);
else else
/* quietly ignore unknown words and anything /* quietly ignore unknown words and anything
@ -649,7 +671,7 @@ static int svc_export_show(struct seq_file *m,
if (exp->ex_uuid) { if (exp->ex_uuid) {
int i; int i;
seq_puts(m, ",uuid="); seq_puts(m, ",uuid=");
for (i=0; i<16; i++) { for (i = 0; i < EX_UUID_LEN; i++) {
if ((i&3) == 0 && i) if ((i&3) == 0 && i)
seq_putc(m, ':'); seq_putc(m, ':');
seq_printf(m, "%02x", exp->ex_uuid[i]); seq_printf(m, "%02x", exp->ex_uuid[i]);
@ -771,7 +793,7 @@ svc_export_update(struct svc_export *new, struct svc_export *old)
static struct svc_expkey * static struct svc_expkey *
exp_find_key(struct cache_detail *cd, svc_client *clp, int fsid_type, exp_find_key(struct cache_detail *cd, struct auth_domain *clp, int fsid_type,
u32 *fsidv, struct cache_req *reqp) u32 *fsidv, struct cache_req *reqp)
{ {
struct svc_expkey key, *ek; struct svc_expkey key, *ek;
@ -793,8 +815,8 @@ exp_find_key(struct cache_detail *cd, svc_client *clp, int fsid_type,
return ek; return ek;
} }
static struct svc_export *
static svc_export *exp_get_by_name(struct cache_detail *cd, svc_client *clp, exp_get_by_name(struct cache_detail *cd, struct auth_domain *clp,
const struct path *path, struct cache_req *reqp) const struct path *path, struct cache_req *reqp)
{ {
struct svc_export *exp, key; struct svc_export *exp, key;
@ -819,11 +841,11 @@ static svc_export *exp_get_by_name(struct cache_detail *cd, svc_client *clp,
/* /*
* Find the export entry for a given dentry. * Find the export entry for a given dentry.
*/ */
static struct svc_export *exp_parent(struct cache_detail *cd, svc_client *clp, static struct svc_export *
struct path *path) exp_parent(struct cache_detail *cd, struct auth_domain *clp, struct path *path)
{ {
struct dentry *saved = dget(path->dentry); struct dentry *saved = dget(path->dentry);
svc_export *exp = exp_get_by_name(cd, clp, path, NULL); struct svc_export *exp = exp_get_by_name(cd, clp, path, NULL);
while (PTR_ERR(exp) == -ENOENT && !IS_ROOT(path->dentry)) { while (PTR_ERR(exp) == -ENOENT && !IS_ROOT(path->dentry)) {
struct dentry *parent = dget_parent(path->dentry); struct dentry *parent = dget_parent(path->dentry);
@ -844,7 +866,7 @@ static struct svc_export *exp_parent(struct cache_detail *cd, svc_client *clp,
* since its harder to fool a kernel module than a user space program. * since its harder to fool a kernel module than a user space program.
*/ */
int int
exp_rootfh(struct net *net, svc_client *clp, char *name, exp_rootfh(struct net *net, struct auth_domain *clp, char *name,
struct knfsd_fh *f, int maxsize) struct knfsd_fh *f, int maxsize)
{ {
struct svc_export *exp; struct svc_export *exp;

View File

@ -1,17 +1,16 @@
/* /*
* include/linux/nfsd/export.h
*
* Public declarations for NFS exports. The definitions for the
* syscall interface are in nfsctl.h
*
* Copyright (C) 1995-1997 Olaf Kirch <okir@monad.swb.de> * Copyright (C) 1995-1997 Olaf Kirch <okir@monad.swb.de>
*/ */
#ifndef NFSD_EXPORT_H #ifndef NFSD_EXPORT_H
#define NFSD_EXPORT_H #define NFSD_EXPORT_H
# include <linux/nfsd/nfsfh.h> #include <linux/sunrpc/cache.h>
#include <uapi/linux/nfsd/export.h> #include <uapi/linux/nfsd/export.h>
struct knfsd_fh;
struct svc_fh;
struct svc_rqst;
/* /*
* FS Locations * FS Locations
*/ */
@ -38,6 +37,7 @@ struct nfsd4_fs_locations {
* spkm3i, and spkm3p (and using all 8 at once should be rare). * spkm3i, and spkm3p (and using all 8 at once should be rare).
*/ */
#define MAX_SECINFO_LIST 8 #define MAX_SECINFO_LIST 8
#define EX_UUID_LEN 16
struct exp_flavor_info { struct exp_flavor_info {
u32 pseudoflavor; u32 pseudoflavor;
@ -54,7 +54,7 @@ struct svc_export {
int ex_fsid; int ex_fsid;
unsigned char * ex_uuid; /* 16 byte fsid */ unsigned char * ex_uuid; /* 16 byte fsid */
struct nfsd4_fs_locations ex_fslocs; struct nfsd4_fs_locations ex_fslocs;
int ex_nflavors; uint32_t ex_nflavors;
struct exp_flavor_info ex_flavors[MAX_SECINFO_LIST]; struct exp_flavor_info ex_flavors[MAX_SECINFO_LIST];
struct cache_detail *cd; struct cache_detail *cd;
}; };

View File

@ -97,25 +97,14 @@ static ssize_t fault_inject_read(struct file *file, char __user *buf,
{ {
static u64 val; static u64 val;
char read_buf[25]; char read_buf[25];
size_t size, ret; size_t size;
loff_t pos = *ppos; loff_t pos = *ppos;
if (!pos) if (!pos)
nfsd_inject_get(file_inode(file)->i_private, &val); nfsd_inject_get(file_inode(file)->i_private, &val);
size = scnprintf(read_buf, sizeof(read_buf), "%llu\n", val); size = scnprintf(read_buf, sizeof(read_buf), "%llu\n", val);
if (pos < 0) return simple_read_from_buffer(buf, len, ppos, read_buf, size);
return -EINVAL;
if (pos >= size || !len)
return 0;
if (len > size - pos)
len = size - pos;
ret = copy_to_user(buf, read_buf + pos, len);
if (ret == len)
return -EFAULT;
len -= ret;
*ppos = pos + len;
return len;
} }
static ssize_t fault_inject_write(struct file *file, const char __user *buf, static ssize_t fault_inject_write(struct file *file, const char __user *buf,

View File

@ -56,7 +56,7 @@ static inline void nfsd_idmap_shutdown(struct net *net)
__be32 nfsd_map_name_to_uid(struct svc_rqst *, const char *, size_t, kuid_t *); __be32 nfsd_map_name_to_uid(struct svc_rqst *, const char *, size_t, kuid_t *);
__be32 nfsd_map_name_to_gid(struct svc_rqst *, const char *, size_t, kgid_t *); __be32 nfsd_map_name_to_gid(struct svc_rqst *, const char *, size_t, kgid_t *);
__be32 nfsd4_encode_user(struct svc_rqst *, kuid_t, __be32 **, int *); __be32 nfsd4_encode_user(struct xdr_stream *, struct svc_rqst *, kuid_t);
__be32 nfsd4_encode_group(struct svc_rqst *, kgid_t, __be32 **, int *); __be32 nfsd4_encode_group(struct xdr_stream *, struct svc_rqst *, kgid_t);
#endif /* LINUX_NFSD_IDMAP_H */ #endif /* LINUX_NFSD_IDMAP_H */

View File

@ -182,7 +182,8 @@ static __be32 nfsacld_proc_access(struct svc_rqst *rqstp, struct nfsd3_accessarg
static int nfsaclsvc_decode_getaclargs(struct svc_rqst *rqstp, __be32 *p, static int nfsaclsvc_decode_getaclargs(struct svc_rqst *rqstp, __be32 *p,
struct nfsd3_getaclargs *argp) struct nfsd3_getaclargs *argp)
{ {
if (!(p = nfs2svc_decode_fh(p, &argp->fh))) p = nfs2svc_decode_fh(p, &argp->fh);
if (!p)
return 0; return 0;
argp->mask = ntohl(*p); p++; argp->mask = ntohl(*p); p++;
@ -197,7 +198,8 @@ static int nfsaclsvc_decode_setaclargs(struct svc_rqst *rqstp, __be32 *p,
unsigned int base; unsigned int base;
int n; int n;
if (!(p = nfs2svc_decode_fh(p, &argp->fh))) p = nfs2svc_decode_fh(p, &argp->fh);
if (!p)
return 0; return 0;
argp->mask = ntohl(*p++); argp->mask = ntohl(*p++);
if (argp->mask & ~(NFS_ACL|NFS_ACLCNT|NFS_DFACL|NFS_DFACLCNT) || if (argp->mask & ~(NFS_ACL|NFS_ACLCNT|NFS_DFACL|NFS_DFACLCNT) ||
@ -218,7 +220,8 @@ static int nfsaclsvc_decode_setaclargs(struct svc_rqst *rqstp, __be32 *p,
static int nfsaclsvc_decode_fhandleargs(struct svc_rqst *rqstp, __be32 *p, static int nfsaclsvc_decode_fhandleargs(struct svc_rqst *rqstp, __be32 *p,
struct nfsd_fhandle *argp) struct nfsd_fhandle *argp)
{ {
if (!(p = nfs2svc_decode_fh(p, &argp->fh))) p = nfs2svc_decode_fh(p, &argp->fh);
if (!p)
return 0; return 0;
return xdr_argsize_check(rqstp, p); return xdr_argsize_check(rqstp, p);
} }
@ -226,7 +229,8 @@ static int nfsaclsvc_decode_fhandleargs(struct svc_rqst *rqstp, __be32 *p,
static int nfsaclsvc_decode_accessargs(struct svc_rqst *rqstp, __be32 *p, static int nfsaclsvc_decode_accessargs(struct svc_rqst *rqstp, __be32 *p,
struct nfsd3_accessargs *argp) struct nfsd3_accessargs *argp)
{ {
if (!(p = nfs2svc_decode_fh(p, &argp->fh))) p = nfs2svc_decode_fh(p, &argp->fh);
if (!p)
return 0; return 0;
argp->access = ntohl(*p++); argp->access = ntohl(*p++);

View File

@ -128,7 +128,8 @@ out:
static int nfs3svc_decode_getaclargs(struct svc_rqst *rqstp, __be32 *p, static int nfs3svc_decode_getaclargs(struct svc_rqst *rqstp, __be32 *p,
struct nfsd3_getaclargs *args) struct nfsd3_getaclargs *args)
{ {
if (!(p = nfs3svc_decode_fh(p, &args->fh))) p = nfs3svc_decode_fh(p, &args->fh);
if (!p)
return 0; return 0;
args->mask = ntohl(*p); p++; args->mask = ntohl(*p); p++;
@ -143,7 +144,8 @@ static int nfs3svc_decode_setaclargs(struct svc_rqst *rqstp, __be32 *p,
unsigned int base; unsigned int base;
int n; int n;
if (!(p = nfs3svc_decode_fh(p, &args->fh))) p = nfs3svc_decode_fh(p, &args->fh);
if (!p)
return 0; return 0;
args->mask = ntohl(*p++); args->mask = ntohl(*p++);
if (args->mask & ~(NFS_ACL|NFS_ACLCNT|NFS_DFACL|NFS_DFACLCNT) || if (args->mask & ~(NFS_ACL|NFS_ACLCNT|NFS_DFACL|NFS_DFACLCNT) ||

View File

@ -278,7 +278,8 @@ void fill_post_wcc(struct svc_fh *fhp)
int int
nfs3svc_decode_fhandle(struct svc_rqst *rqstp, __be32 *p, struct nfsd_fhandle *args) nfs3svc_decode_fhandle(struct svc_rqst *rqstp, __be32 *p, struct nfsd_fhandle *args)
{ {
if (!(p = decode_fh(p, &args->fh))) p = decode_fh(p, &args->fh);
if (!p)
return 0; return 0;
return xdr_argsize_check(rqstp, p); return xdr_argsize_check(rqstp, p);
} }
@ -287,7 +288,8 @@ int
nfs3svc_decode_sattrargs(struct svc_rqst *rqstp, __be32 *p, nfs3svc_decode_sattrargs(struct svc_rqst *rqstp, __be32 *p,
struct nfsd3_sattrargs *args) struct nfsd3_sattrargs *args)
{ {
if (!(p = decode_fh(p, &args->fh))) p = decode_fh(p, &args->fh);
if (!p)
return 0; return 0;
p = decode_sattr3(p, &args->attrs); p = decode_sattr3(p, &args->attrs);
@ -315,7 +317,8 @@ int
nfs3svc_decode_accessargs(struct svc_rqst *rqstp, __be32 *p, nfs3svc_decode_accessargs(struct svc_rqst *rqstp, __be32 *p,
struct nfsd3_accessargs *args) struct nfsd3_accessargs *args)
{ {
if (!(p = decode_fh(p, &args->fh))) p = decode_fh(p, &args->fh);
if (!p)
return 0; return 0;
args->access = ntohl(*p++); args->access = ntohl(*p++);
@ -330,7 +333,8 @@ nfs3svc_decode_readargs(struct svc_rqst *rqstp, __be32 *p,
int v; int v;
u32 max_blocksize = svc_max_payload(rqstp); u32 max_blocksize = svc_max_payload(rqstp);
if (!(p = decode_fh(p, &args->fh))) p = decode_fh(p, &args->fh);
if (!p)
return 0; return 0;
p = xdr_decode_hyper(p, &args->offset); p = xdr_decode_hyper(p, &args->offset);
@ -360,7 +364,8 @@ nfs3svc_decode_writeargs(struct svc_rqst *rqstp, __be32 *p,
unsigned int len, v, hdr, dlen; unsigned int len, v, hdr, dlen;
u32 max_blocksize = svc_max_payload(rqstp); u32 max_blocksize = svc_max_payload(rqstp);
if (!(p = decode_fh(p, &args->fh))) p = decode_fh(p, &args->fh);
if (!p)
return 0; return 0;
p = xdr_decode_hyper(p, &args->offset); p = xdr_decode_hyper(p, &args->offset);
@ -535,7 +540,8 @@ int
nfs3svc_decode_readlinkargs(struct svc_rqst *rqstp, __be32 *p, nfs3svc_decode_readlinkargs(struct svc_rqst *rqstp, __be32 *p,
struct nfsd3_readlinkargs *args) struct nfsd3_readlinkargs *args)
{ {
if (!(p = decode_fh(p, &args->fh))) p = decode_fh(p, &args->fh);
if (!p)
return 0; return 0;
args->buffer = page_address(*(rqstp->rq_next_page++)); args->buffer = page_address(*(rqstp->rq_next_page++));
@ -558,7 +564,8 @@ int
nfs3svc_decode_readdirargs(struct svc_rqst *rqstp, __be32 *p, nfs3svc_decode_readdirargs(struct svc_rqst *rqstp, __be32 *p,
struct nfsd3_readdirargs *args) struct nfsd3_readdirargs *args)
{ {
if (!(p = decode_fh(p, &args->fh))) p = decode_fh(p, &args->fh);
if (!p)
return 0; return 0;
p = xdr_decode_hyper(p, &args->cookie); p = xdr_decode_hyper(p, &args->cookie);
args->verf = p; p += 2; args->verf = p; p += 2;
@ -580,7 +587,8 @@ nfs3svc_decode_readdirplusargs(struct svc_rqst *rqstp, __be32 *p,
int len; int len;
u32 max_blocksize = svc_max_payload(rqstp); u32 max_blocksize = svc_max_payload(rqstp);
if (!(p = decode_fh(p, &args->fh))) p = decode_fh(p, &args->fh);
if (!p)
return 0; return 0;
p = xdr_decode_hyper(p, &args->cookie); p = xdr_decode_hyper(p, &args->cookie);
args->verf = p; p += 2; args->verf = p; p += 2;
@ -605,7 +613,8 @@ int
nfs3svc_decode_commitargs(struct svc_rqst *rqstp, __be32 *p, nfs3svc_decode_commitargs(struct svc_rqst *rqstp, __be32 *p,
struct nfsd3_commitargs *args) struct nfsd3_commitargs *args)
{ {
if (!(p = decode_fh(p, &args->fh))) p = decode_fh(p, &args->fh);
if (!p)
return 0; return 0;
p = xdr_decode_hyper(p, &args->offset); p = xdr_decode_hyper(p, &args->offset);
args->count = ntohl(*p++); args->count = ntohl(*p++);

View File

@ -36,7 +36,6 @@
#include <linux/slab.h> #include <linux/slab.h>
#include <linux/nfs_fs.h> #include <linux/nfs_fs.h>
#include <linux/export.h>
#include "nfsfh.h" #include "nfsfh.h"
#include "nfsd.h" #include "nfsd.h"
#include "acl.h" #include "acl.h"
@ -920,20 +919,19 @@ nfs4_acl_get_whotype(char *p, u32 len)
return NFS4_ACL_WHO_NAMED; return NFS4_ACL_WHO_NAMED;
} }
__be32 nfs4_acl_write_who(int who, __be32 **p, int *len) __be32 nfs4_acl_write_who(struct xdr_stream *xdr, int who)
{ {
__be32 *p;
int i; int i;
int bytes;
for (i = 0; i < ARRAY_SIZE(s2t_map); i++) { for (i = 0; i < ARRAY_SIZE(s2t_map); i++) {
if (s2t_map[i].type != who) if (s2t_map[i].type != who)
continue; continue;
bytes = 4 + (XDR_QUADLEN(s2t_map[i].stringlen) << 2); p = xdr_reserve_space(xdr, s2t_map[i].stringlen + 4);
if (bytes > *len) if (!p)
return nfserr_resource; return nfserr_resource;
*p = xdr_encode_opaque(*p, s2t_map[i].string, p = xdr_encode_opaque(p, s2t_map[i].string,
s2t_map[i].stringlen); s2t_map[i].stringlen);
*len -= bytes;
return 0; return 0;
} }
WARN_ON_ONCE(1); WARN_ON_ONCE(1);

View File

@ -551,44 +551,43 @@ idmap_name_to_id(struct svc_rqst *rqstp, int type, const char *name, u32 namelen
return 0; return 0;
} }
static __be32 encode_ascii_id(u32 id, __be32 **p, int *buflen) static __be32 encode_ascii_id(struct xdr_stream *xdr, u32 id)
{ {
char buf[11]; char buf[11];
int len; int len;
int bytes; __be32 *p;
len = sprintf(buf, "%u", id); len = sprintf(buf, "%u", id);
bytes = 4 + (XDR_QUADLEN(len) << 2); p = xdr_reserve_space(xdr, len + 4);
if (bytes > *buflen) if (!p)
return nfserr_resource; return nfserr_resource;
*p = xdr_encode_opaque(*p, buf, len); p = xdr_encode_opaque(p, buf, len);
*buflen -= bytes;
return 0; return 0;
} }
static __be32 idmap_id_to_name(struct svc_rqst *rqstp, int type, u32 id, __be32 **p, int *buflen) static __be32 idmap_id_to_name(struct xdr_stream *xdr,
struct svc_rqst *rqstp, int type, u32 id)
{ {
struct ent *item, key = { struct ent *item, key = {
.id = id, .id = id,
.type = type, .type = type,
}; };
__be32 *p;
int ret; int ret;
int bytes;
struct nfsd_net *nn = net_generic(SVC_NET(rqstp), nfsd_net_id); struct nfsd_net *nn = net_generic(SVC_NET(rqstp), nfsd_net_id);
strlcpy(key.authname, rqst_authname(rqstp), sizeof(key.authname)); strlcpy(key.authname, rqst_authname(rqstp), sizeof(key.authname));
ret = idmap_lookup(rqstp, idtoname_lookup, &key, nn->idtoname_cache, &item); ret = idmap_lookup(rqstp, idtoname_lookup, &key, nn->idtoname_cache, &item);
if (ret == -ENOENT) if (ret == -ENOENT)
return encode_ascii_id(id, p, buflen); return encode_ascii_id(xdr, id);
if (ret) if (ret)
return nfserrno(ret); return nfserrno(ret);
ret = strlen(item->name); ret = strlen(item->name);
WARN_ON_ONCE(ret > IDMAP_NAMESZ); WARN_ON_ONCE(ret > IDMAP_NAMESZ);
bytes = 4 + (XDR_QUADLEN(ret) << 2); p = xdr_reserve_space(xdr, ret + 4);
if (bytes > *buflen) if (!p)
return nfserr_resource; return nfserr_resource;
*p = xdr_encode_opaque(*p, item->name, ret); p = xdr_encode_opaque(p, item->name, ret);
*buflen -= bytes;
cache_put(&item->h, nn->idtoname_cache); cache_put(&item->h, nn->idtoname_cache);
return 0; return 0;
} }
@ -622,11 +621,12 @@ do_name_to_id(struct svc_rqst *rqstp, int type, const char *name, u32 namelen, u
return idmap_name_to_id(rqstp, type, name, namelen, id); return idmap_name_to_id(rqstp, type, name, namelen, id);
} }
static __be32 encode_name_from_id(struct svc_rqst *rqstp, int type, u32 id, __be32 **p, int *buflen) static __be32 encode_name_from_id(struct xdr_stream *xdr,
struct svc_rqst *rqstp, int type, u32 id)
{ {
if (nfs4_disable_idmapping && rqstp->rq_cred.cr_flavor < RPC_AUTH_GSS) if (nfs4_disable_idmapping && rqstp->rq_cred.cr_flavor < RPC_AUTH_GSS)
return encode_ascii_id(id, p, buflen); return encode_ascii_id(xdr, id);
return idmap_id_to_name(rqstp, type, id, p, buflen); return idmap_id_to_name(xdr, rqstp, type, id);
} }
__be32 __be32
@ -655,14 +655,16 @@ nfsd_map_name_to_gid(struct svc_rqst *rqstp, const char *name, size_t namelen,
return status; return status;
} }
__be32 nfsd4_encode_user(struct svc_rqst *rqstp, kuid_t uid, __be32 **p, int *buflen) __be32 nfsd4_encode_user(struct xdr_stream *xdr, struct svc_rqst *rqstp,
kuid_t uid)
{ {
u32 id = from_kuid(&init_user_ns, uid); u32 id = from_kuid(&init_user_ns, uid);
return encode_name_from_id(rqstp, IDMAP_TYPE_USER, id, p, buflen); return encode_name_from_id(xdr, rqstp, IDMAP_TYPE_USER, id);
} }
__be32 nfsd4_encode_group(struct svc_rqst *rqstp, kgid_t gid, __be32 **p, int *buflen) __be32 nfsd4_encode_group(struct xdr_stream *xdr, struct svc_rqst *rqstp,
kgid_t gid)
{ {
u32 id = from_kgid(&init_user_ns, gid); u32 id = from_kgid(&init_user_ns, gid);
return encode_name_from_id(rqstp, IDMAP_TYPE_GROUP, id, p, buflen); return encode_name_from_id(xdr, rqstp, IDMAP_TYPE_GROUP, id);
} }

View File

@ -430,12 +430,12 @@ nfsd4_open(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
goto out; goto out;
break; break;
case NFS4_OPEN_CLAIM_PREVIOUS: case NFS4_OPEN_CLAIM_PREVIOUS:
open->op_openowner->oo_flags |= NFS4_OO_CONFIRMED;
status = nfs4_check_open_reclaim(&open->op_clientid, status = nfs4_check_open_reclaim(&open->op_clientid,
cstate->minorversion, cstate->minorversion,
nn); nn);
if (status) if (status)
goto out; goto out;
open->op_openowner->oo_flags |= NFS4_OO_CONFIRMED;
case NFS4_OPEN_CLAIM_FH: case NFS4_OPEN_CLAIM_FH:
case NFS4_OPEN_CLAIM_DELEG_CUR_FH: case NFS4_OPEN_CLAIM_DELEG_CUR_FH:
status = do_open_fhandle(rqstp, cstate, open); status = do_open_fhandle(rqstp, cstate, open);
@ -445,7 +445,6 @@ nfsd4_open(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
break; break;
case NFS4_OPEN_CLAIM_DELEG_PREV_FH: case NFS4_OPEN_CLAIM_DELEG_PREV_FH:
case NFS4_OPEN_CLAIM_DELEGATE_PREV: case NFS4_OPEN_CLAIM_DELEGATE_PREV:
open->op_openowner->oo_flags |= NFS4_OO_CONFIRMED;
dprintk("NFSD: unsupported OPEN claim type %d\n", dprintk("NFSD: unsupported OPEN claim type %d\n",
open->op_claim_type); open->op_claim_type);
status = nfserr_notsupp; status = nfserr_notsupp;
@ -786,7 +785,6 @@ nfsd4_read(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
if (!nfsd4_last_compound_op(rqstp)) if (!nfsd4_last_compound_op(rqstp))
rqstp->rq_splice_ok = false; rqstp->rq_splice_ok = false;
nfs4_lock_state();
/* check stateid */ /* check stateid */
if ((status = nfs4_preprocess_stateid_op(SVC_NET(rqstp), if ((status = nfs4_preprocess_stateid_op(SVC_NET(rqstp),
cstate, &read->rd_stateid, cstate, &read->rd_stateid,
@ -794,11 +792,8 @@ nfsd4_read(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
dprintk("NFSD: nfsd4_read: couldn't process stateid!\n"); dprintk("NFSD: nfsd4_read: couldn't process stateid!\n");
goto out; goto out;
} }
if (read->rd_filp)
get_file(read->rd_filp);
status = nfs_ok; status = nfs_ok;
out: out:
nfs4_unlock_state();
read->rd_rqstp = rqstp; read->rd_rqstp = rqstp;
read->rd_fhp = &cstate->current_fh; read->rd_fhp = &cstate->current_fh;
return status; return status;
@ -937,10 +932,8 @@ nfsd4_setattr(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
int err; int err;
if (setattr->sa_iattr.ia_valid & ATTR_SIZE) { if (setattr->sa_iattr.ia_valid & ATTR_SIZE) {
nfs4_lock_state();
status = nfs4_preprocess_stateid_op(SVC_NET(rqstp), cstate, status = nfs4_preprocess_stateid_op(SVC_NET(rqstp), cstate,
&setattr->sa_stateid, WR_STATE, NULL); &setattr->sa_stateid, WR_STATE, NULL);
nfs4_unlock_state();
if (status) { if (status) {
dprintk("NFSD: nfsd4_setattr: couldn't process stateid!\n"); dprintk("NFSD: nfsd4_setattr: couldn't process stateid!\n");
return status; return status;
@ -1006,17 +999,12 @@ nfsd4_write(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
if (write->wr_offset >= OFFSET_MAX) if (write->wr_offset >= OFFSET_MAX)
return nfserr_inval; return nfserr_inval;
nfs4_lock_state();
status = nfs4_preprocess_stateid_op(SVC_NET(rqstp), status = nfs4_preprocess_stateid_op(SVC_NET(rqstp),
cstate, stateid, WR_STATE, &filp); cstate, stateid, WR_STATE, &filp);
if (status) { if (status) {
nfs4_unlock_state();
dprintk("NFSD: nfsd4_write: couldn't process stateid!\n"); dprintk("NFSD: nfsd4_write: couldn't process stateid!\n");
return status; return status;
} }
if (filp)
get_file(filp);
nfs4_unlock_state();
cnt = write->wr_buflen; cnt = write->wr_buflen;
write->wr_how_written = write->wr_stable_how; write->wr_how_written = write->wr_stable_how;
@ -1072,10 +1060,10 @@ _nfsd4_verify(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
return nfserr_jukebox; return nfserr_jukebox;
p = buf; p = buf;
status = nfsd4_encode_fattr(&cstate->current_fh, status = nfsd4_encode_fattr_to_buf(&p, count, &cstate->current_fh,
cstate->current_fh.fh_export, cstate->current_fh.fh_export,
cstate->current_fh.fh_dentry, &p, cstate->current_fh.fh_dentry,
count, verify->ve_bmval, verify->ve_bmval,
rqstp, 0); rqstp, 0);
/* /*
* If nfsd4_encode_fattr() ran out of space, assume that's because * If nfsd4_encode_fattr() ran out of space, assume that's because
@ -1182,9 +1170,7 @@ struct nfsd4_operation {
static struct nfsd4_operation nfsd4_ops[]; static struct nfsd4_operation nfsd4_ops[];
#ifdef NFSD_DEBUG
static const char *nfsd4_op_name(unsigned opnum); static const char *nfsd4_op_name(unsigned opnum);
#endif
/* /*
* Enforce NFSv4.1 COMPOUND ordering rules: * Enforce NFSv4.1 COMPOUND ordering rules:
@ -1226,6 +1212,8 @@ static inline struct nfsd4_operation *OPDESC(struct nfsd4_op *op)
bool nfsd4_cache_this_op(struct nfsd4_op *op) bool nfsd4_cache_this_op(struct nfsd4_op *op)
{ {
if (op->opnum == OP_ILLEGAL)
return false;
return OPDESC(op)->op_flags & OP_CACHEME; return OPDESC(op)->op_flags & OP_CACHEME;
} }
@ -1262,6 +1250,25 @@ static bool need_wrongsec_check(struct svc_rqst *rqstp)
return !(nextd->op_flags & OP_HANDLES_WRONGSEC); return !(nextd->op_flags & OP_HANDLES_WRONGSEC);
} }
static void svcxdr_init_encode(struct svc_rqst *rqstp,
struct nfsd4_compoundres *resp)
{
struct xdr_stream *xdr = &resp->xdr;
struct xdr_buf *buf = &rqstp->rq_res;
struct kvec *head = buf->head;
xdr->buf = buf;
xdr->iov = head;
xdr->p = head->iov_base + head->iov_len;
xdr->end = head->iov_base + PAGE_SIZE - rqstp->rq_auth_slack;
/* Tail and page_len should be zero at this point: */
buf->len = buf->head[0].iov_len;
xdr->scratch.iov_len = 0;
xdr->page_ptr = buf->pages - 1;
buf->buflen = PAGE_SIZE * (1 + rqstp->rq_page_end - buf->pages)
- rqstp->rq_auth_slack;
}
/* /*
* COMPOUND call. * COMPOUND call.
*/ */
@ -1275,24 +1282,16 @@ nfsd4_proc_compound(struct svc_rqst *rqstp,
struct nfsd4_compound_state *cstate = &resp->cstate; struct nfsd4_compound_state *cstate = &resp->cstate;
struct svc_fh *current_fh = &cstate->current_fh; struct svc_fh *current_fh = &cstate->current_fh;
struct svc_fh *save_fh = &cstate->save_fh; struct svc_fh *save_fh = &cstate->save_fh;
int slack_bytes;
u32 plen = 0;
__be32 status; __be32 status;
resp->xbuf = &rqstp->rq_res; svcxdr_init_encode(rqstp, resp);
resp->p = rqstp->rq_res.head[0].iov_base + resp->tagp = resp->xdr.p;
rqstp->rq_res.head[0].iov_len;
resp->tagp = resp->p;
/* reserve space for: taglen, tag, and opcnt */ /* reserve space for: taglen, tag, and opcnt */
resp->p += 2 + XDR_QUADLEN(args->taglen); xdr_reserve_space(&resp->xdr, 8 + args->taglen);
resp->end = rqstp->rq_res.head[0].iov_base + PAGE_SIZE;
resp->taglen = args->taglen; resp->taglen = args->taglen;
resp->tag = args->tag; resp->tag = args->tag;
resp->opcnt = 0;
resp->rqstp = rqstp; resp->rqstp = rqstp;
cstate->minorversion = args->minorversion; cstate->minorversion = args->minorversion;
cstate->replay_owner = NULL;
cstate->session = NULL;
fh_init(current_fh, NFS4_FHSIZE); fh_init(current_fh, NFS4_FHSIZE);
fh_init(save_fh, NFS4_FHSIZE); fh_init(save_fh, NFS4_FHSIZE);
/* /*
@ -1332,19 +1331,6 @@ nfsd4_proc_compound(struct svc_rqst *rqstp,
goto encode_op; goto encode_op;
} }
/* We must be able to encode a successful response to
* this operation, with enough room left over to encode a
* failed response to the next operation. If we don't
* have enough room, fail with ERR_RESOURCE.
*/
slack_bytes = (char *)resp->end - (char *)resp->p;
if (slack_bytes < COMPOUND_SLACK_SPACE
+ COMPOUND_ERR_SLACK_SPACE) {
BUG_ON(slack_bytes < COMPOUND_ERR_SLACK_SPACE);
op->status = nfserr_resource;
goto encode_op;
}
opdesc = OPDESC(op); opdesc = OPDESC(op);
if (!current_fh->fh_dentry) { if (!current_fh->fh_dentry) {
@ -1362,9 +1348,13 @@ nfsd4_proc_compound(struct svc_rqst *rqstp,
/* If op is non-idempotent */ /* If op is non-idempotent */
if (opdesc->op_flags & OP_MODIFIES_SOMETHING) { if (opdesc->op_flags & OP_MODIFIES_SOMETHING) {
plen = opdesc->op_rsize_bop(rqstp, op);
/* /*
* If there's still another operation, make sure * Don't execute this op if we couldn't encode a
* succesful reply:
*/
u32 plen = opdesc->op_rsize_bop(rqstp, op);
/*
* Plus if there's another operation, make sure
* we'll have space to at least encode an error: * we'll have space to at least encode an error:
*/ */
if (resp->opcnt < args->opcnt) if (resp->opcnt < args->opcnt)
@ -1399,7 +1389,7 @@ encode_op:
} }
if (op->status == nfserr_replay_me) { if (op->status == nfserr_replay_me) {
op->replay = &cstate->replay_owner->so_replay; op->replay = &cstate->replay_owner->so_replay;
nfsd4_encode_replay(resp, op); nfsd4_encode_replay(&resp->xdr, op);
status = op->status = op->replay->rp_status; status = op->status = op->replay->rp_status;
} else { } else {
nfsd4_encode_operation(resp, op); nfsd4_encode_operation(resp, op);
@ -1438,7 +1428,8 @@ out:
#define op_encode_change_info_maxsz (5) #define op_encode_change_info_maxsz (5)
#define nfs4_fattr_bitmap_maxsz (4) #define nfs4_fattr_bitmap_maxsz (4)
#define op_encode_lockowner_maxsz (1 + XDR_QUADLEN(IDMAP_NAMESZ)) /* We'll fall back on returning no lockowner if run out of space: */
#define op_encode_lockowner_maxsz (0)
#define op_encode_lock_denied_maxsz (8 + op_encode_lockowner_maxsz) #define op_encode_lock_denied_maxsz (8 + op_encode_lockowner_maxsz)
#define nfs4_owner_maxsz (1 + XDR_QUADLEN(IDMAP_NAMESZ)) #define nfs4_owner_maxsz (1 + XDR_QUADLEN(IDMAP_NAMESZ))
@ -1470,6 +1461,49 @@ static inline u32 nfsd4_create_rsize(struct svc_rqst *rqstp, struct nfsd4_op *op
+ nfs4_fattr_bitmap_maxsz) * sizeof(__be32); + nfs4_fattr_bitmap_maxsz) * sizeof(__be32);
} }
/*
* Note since this is an idempotent operation we won't insist on failing
* the op prematurely if the estimate is too large. We may turn off splice
* reads unnecessarily.
*/
static inline u32 nfsd4_getattr_rsize(struct svc_rqst *rqstp,
struct nfsd4_op *op)
{
u32 *bmap = op->u.getattr.ga_bmval;
u32 bmap0 = bmap[0], bmap1 = bmap[1], bmap2 = bmap[2];
u32 ret = 0;
if (bmap0 & FATTR4_WORD0_ACL)
return svc_max_payload(rqstp);
if (bmap0 & FATTR4_WORD0_FS_LOCATIONS)
return svc_max_payload(rqstp);
if (bmap1 & FATTR4_WORD1_OWNER) {
ret += IDMAP_NAMESZ + 4;
bmap1 &= ~FATTR4_WORD1_OWNER;
}
if (bmap1 & FATTR4_WORD1_OWNER_GROUP) {
ret += IDMAP_NAMESZ + 4;
bmap1 &= ~FATTR4_WORD1_OWNER_GROUP;
}
if (bmap0 & FATTR4_WORD0_FILEHANDLE) {
ret += NFS4_FHSIZE + 4;
bmap0 &= ~FATTR4_WORD0_FILEHANDLE;
}
if (bmap2 & FATTR4_WORD2_SECURITY_LABEL) {
ret += NFSD4_MAX_SEC_LABEL_LEN + 12;
bmap2 &= ~FATTR4_WORD2_SECURITY_LABEL;
}
/*
* Largest of remaining attributes are 16 bytes (e.g.,
* supported_attributes)
*/
ret += 16 * (hweight32(bmap0) + hweight32(bmap1) + hweight32(bmap2));
/* bitmask, length */
ret += 20;
return ret;
}
static inline u32 nfsd4_link_rsize(struct svc_rqst *rqstp, struct nfsd4_op *op) static inline u32 nfsd4_link_rsize(struct svc_rqst *rqstp, struct nfsd4_op *op)
{ {
return (op_encode_hdr_size + op_encode_change_info_maxsz) return (op_encode_hdr_size + op_encode_change_info_maxsz)
@ -1500,18 +1534,19 @@ static inline u32 nfsd4_read_rsize(struct svc_rqst *rqstp, struct nfsd4_op *op)
if (rlen > maxcount) if (rlen > maxcount)
rlen = maxcount; rlen = maxcount;
return (op_encode_hdr_size + 2) * sizeof(__be32) + rlen; return (op_encode_hdr_size + 2 + XDR_QUADLEN(rlen)) * sizeof(__be32);
} }
static inline u32 nfsd4_readdir_rsize(struct svc_rqst *rqstp, struct nfsd4_op *op) static inline u32 nfsd4_readdir_rsize(struct svc_rqst *rqstp, struct nfsd4_op *op)
{ {
u32 maxcount = svc_max_payload(rqstp);
u32 rlen = op->u.readdir.rd_maxcount; u32 rlen = op->u.readdir.rd_maxcount;
if (rlen > PAGE_SIZE) if (rlen > maxcount)
rlen = PAGE_SIZE; rlen = maxcount;
return (op_encode_hdr_size + op_encode_verifier_maxsz) return (op_encode_hdr_size + op_encode_verifier_maxsz +
* sizeof(__be32) + rlen; XDR_QUADLEN(rlen)) * sizeof(__be32);
} }
static inline u32 nfsd4_remove_rsize(struct svc_rqst *rqstp, struct nfsd4_op *op) static inline u32 nfsd4_remove_rsize(struct svc_rqst *rqstp, struct nfsd4_op *op)
@ -1526,6 +1561,12 @@ static inline u32 nfsd4_rename_rsize(struct svc_rqst *rqstp, struct nfsd4_op *op
+ op_encode_change_info_maxsz) * sizeof(__be32); + op_encode_change_info_maxsz) * sizeof(__be32);
} }
static inline u32 nfsd4_sequence_rsize(struct svc_rqst *rqstp,
struct nfsd4_op *op)
{
return NFS4_MAX_SESSIONID_LEN + 20;
}
static inline u32 nfsd4_setattr_rsize(struct svc_rqst *rqstp, struct nfsd4_op *op) static inline u32 nfsd4_setattr_rsize(struct svc_rqst *rqstp, struct nfsd4_op *op)
{ {
return (op_encode_hdr_size + nfs4_fattr_bitmap_maxsz) * sizeof(__be32); return (op_encode_hdr_size + nfs4_fattr_bitmap_maxsz) * sizeof(__be32);
@ -1539,7 +1580,7 @@ static inline u32 nfsd4_setclientid_rsize(struct svc_rqst *rqstp, struct nfsd4_o
static inline u32 nfsd4_write_rsize(struct svc_rqst *rqstp, struct nfsd4_op *op) static inline u32 nfsd4_write_rsize(struct svc_rqst *rqstp, struct nfsd4_op *op)
{ {
return (op_encode_hdr_size + op_encode_verifier_maxsz) * sizeof(__be32); return (op_encode_hdr_size + 2 + op_encode_verifier_maxsz) * sizeof(__be32);
} }
static inline u32 nfsd4_exchange_id_rsize(struct svc_rqst *rqstp, struct nfsd4_op *op) static inline u32 nfsd4_exchange_id_rsize(struct svc_rqst *rqstp, struct nfsd4_op *op)
@ -1607,6 +1648,7 @@ static struct nfsd4_operation nfsd4_ops[] = {
[OP_GETATTR] = { [OP_GETATTR] = {
.op_func = (nfsd4op_func)nfsd4_getattr, .op_func = (nfsd4op_func)nfsd4_getattr,
.op_flags = ALLOWED_ON_ABSENT_FS, .op_flags = ALLOWED_ON_ABSENT_FS,
.op_rsize_bop = nfsd4_getattr_rsize,
.op_name = "OP_GETATTR", .op_name = "OP_GETATTR",
}, },
[OP_GETFH] = { [OP_GETFH] = {
@ -1676,37 +1718,32 @@ static struct nfsd4_operation nfsd4_ops[] = {
[OP_PUTFH] = { [OP_PUTFH] = {
.op_func = (nfsd4op_func)nfsd4_putfh, .op_func = (nfsd4op_func)nfsd4_putfh,
.op_flags = ALLOWED_WITHOUT_FH | ALLOWED_ON_ABSENT_FS .op_flags = ALLOWED_WITHOUT_FH | ALLOWED_ON_ABSENT_FS
| OP_IS_PUTFH_LIKE | OP_MODIFIES_SOMETHING | OP_IS_PUTFH_LIKE | OP_CLEAR_STATEID,
| OP_CLEAR_STATEID,
.op_name = "OP_PUTFH", .op_name = "OP_PUTFH",
.op_rsize_bop = (nfsd4op_rsize)nfsd4_only_status_rsize, .op_rsize_bop = (nfsd4op_rsize)nfsd4_only_status_rsize,
}, },
[OP_PUTPUBFH] = { [OP_PUTPUBFH] = {
.op_func = (nfsd4op_func)nfsd4_putrootfh, .op_func = (nfsd4op_func)nfsd4_putrootfh,
.op_flags = ALLOWED_WITHOUT_FH | ALLOWED_ON_ABSENT_FS .op_flags = ALLOWED_WITHOUT_FH | ALLOWED_ON_ABSENT_FS
| OP_IS_PUTFH_LIKE | OP_MODIFIES_SOMETHING | OP_IS_PUTFH_LIKE | OP_CLEAR_STATEID,
| OP_CLEAR_STATEID,
.op_name = "OP_PUTPUBFH", .op_name = "OP_PUTPUBFH",
.op_rsize_bop = (nfsd4op_rsize)nfsd4_only_status_rsize, .op_rsize_bop = (nfsd4op_rsize)nfsd4_only_status_rsize,
}, },
[OP_PUTROOTFH] = { [OP_PUTROOTFH] = {
.op_func = (nfsd4op_func)nfsd4_putrootfh, .op_func = (nfsd4op_func)nfsd4_putrootfh,
.op_flags = ALLOWED_WITHOUT_FH | ALLOWED_ON_ABSENT_FS .op_flags = ALLOWED_WITHOUT_FH | ALLOWED_ON_ABSENT_FS
| OP_IS_PUTFH_LIKE | OP_MODIFIES_SOMETHING | OP_IS_PUTFH_LIKE | OP_CLEAR_STATEID,
| OP_CLEAR_STATEID,
.op_name = "OP_PUTROOTFH", .op_name = "OP_PUTROOTFH",
.op_rsize_bop = (nfsd4op_rsize)nfsd4_only_status_rsize, .op_rsize_bop = (nfsd4op_rsize)nfsd4_only_status_rsize,
}, },
[OP_READ] = { [OP_READ] = {
.op_func = (nfsd4op_func)nfsd4_read, .op_func = (nfsd4op_func)nfsd4_read,
.op_flags = OP_MODIFIES_SOMETHING,
.op_name = "OP_READ", .op_name = "OP_READ",
.op_rsize_bop = (nfsd4op_rsize)nfsd4_read_rsize, .op_rsize_bop = (nfsd4op_rsize)nfsd4_read_rsize,
.op_get_currentstateid = (stateid_getter)nfsd4_get_readstateid, .op_get_currentstateid = (stateid_getter)nfsd4_get_readstateid,
}, },
[OP_READDIR] = { [OP_READDIR] = {
.op_func = (nfsd4op_func)nfsd4_readdir, .op_func = (nfsd4op_func)nfsd4_readdir,
.op_flags = OP_MODIFIES_SOMETHING,
.op_name = "OP_READDIR", .op_name = "OP_READDIR",
.op_rsize_bop = (nfsd4op_rsize)nfsd4_readdir_rsize, .op_rsize_bop = (nfsd4op_rsize)nfsd4_readdir_rsize,
}, },
@ -1864,14 +1901,33 @@ static struct nfsd4_operation nfsd4_ops[] = {
}, },
}; };
#ifdef NFSD_DEBUG int nfsd4_max_reply(struct svc_rqst *rqstp, struct nfsd4_op *op)
{
struct nfsd4_operation *opdesc;
nfsd4op_rsize estimator;
if (op->opnum == OP_ILLEGAL)
return op_encode_hdr_size * sizeof(__be32);
opdesc = OPDESC(op);
estimator = opdesc->op_rsize_bop;
return estimator ? estimator(rqstp, op) : PAGE_SIZE;
}
void warn_on_nonidempotent_op(struct nfsd4_op *op)
{
if (OPDESC(op)->op_flags & OP_MODIFIES_SOMETHING) {
pr_err("unable to encode reply to nonidempotent op %d (%s)\n",
op->opnum, nfsd4_op_name(op->opnum));
WARN_ON_ONCE(1);
}
}
static const char *nfsd4_op_name(unsigned opnum) static const char *nfsd4_op_name(unsigned opnum)
{ {
if (opnum < ARRAY_SIZE(nfsd4_ops)) if (opnum < ARRAY_SIZE(nfsd4_ops))
return nfsd4_ops[opnum].op_name; return nfsd4_ops[opnum].op_name;
return "unknown_operation"; return "unknown_operation";
} }
#endif
#define nfsd4_voidres nfsd4_voidargs #define nfsd4_voidres nfsd4_voidargs
struct nfsd4_voidargs { int dummy; }; struct nfsd4_voidargs { int dummy; };

View File

@ -81,13 +81,13 @@ static DEFINE_MUTEX(client_mutex);
* effort to decrease the scope of the client_mutex, this spinlock may * effort to decrease the scope of the client_mutex, this spinlock may
* eventually cover more: * eventually cover more:
*/ */
static DEFINE_SPINLOCK(recall_lock); static DEFINE_SPINLOCK(state_lock);
static struct kmem_cache *openowner_slab = NULL; static struct kmem_cache *openowner_slab;
static struct kmem_cache *lockowner_slab = NULL; static struct kmem_cache *lockowner_slab;
static struct kmem_cache *file_slab = NULL; static struct kmem_cache *file_slab;
static struct kmem_cache *stateid_slab = NULL; static struct kmem_cache *stateid_slab;
static struct kmem_cache *deleg_slab = NULL; static struct kmem_cache *deleg_slab;
void void
nfs4_lock_state(void) nfs4_lock_state(void)
@ -235,9 +235,9 @@ static void nfsd4_free_file(struct nfs4_file *f)
static inline void static inline void
put_nfs4_file(struct nfs4_file *fi) put_nfs4_file(struct nfs4_file *fi)
{ {
if (atomic_dec_and_lock(&fi->fi_ref, &recall_lock)) { if (atomic_dec_and_lock(&fi->fi_ref, &state_lock)) {
hlist_del(&fi->fi_hash); hlist_del(&fi->fi_hash);
spin_unlock(&recall_lock); spin_unlock(&state_lock);
iput(fi->fi_inode); iput(fi->fi_inode);
nfsd4_free_file(fi); nfsd4_free_file(fi);
} }
@ -375,7 +375,6 @@ alloc_init_deleg(struct nfs4_client *clp, struct nfs4_ol_stateid *stp, struct sv
dp = delegstateid(nfs4_alloc_stid(clp, deleg_slab)); dp = delegstateid(nfs4_alloc_stid(clp, deleg_slab));
if (dp == NULL) if (dp == NULL)
return dp; return dp;
dp->dl_stid.sc_type = NFS4_DELEG_STID;
/* /*
* delegation seqid's are never incremented. The 4.1 special * delegation seqid's are never incremented. The 4.1 special
* meaning of seqid 0 isn't meaningful, really, but let's avoid * meaning of seqid 0 isn't meaningful, really, but let's avoid
@ -418,6 +417,8 @@ nfs4_put_delegation(struct nfs4_delegation *dp)
static void nfs4_put_deleg_lease(struct nfs4_file *fp) static void nfs4_put_deleg_lease(struct nfs4_file *fp)
{ {
if (!fp->fi_lease)
return;
if (atomic_dec_and_test(&fp->fi_delegees)) { if (atomic_dec_and_test(&fp->fi_delegees)) {
vfs_setlease(fp->fi_deleg_file, F_UNLCK, &fp->fi_lease); vfs_setlease(fp->fi_deleg_file, F_UNLCK, &fp->fi_lease);
fp->fi_lease = NULL; fp->fi_lease = NULL;
@ -431,19 +432,31 @@ static void unhash_stid(struct nfs4_stid *s)
s->sc_type = 0; s->sc_type = 0;
} }
static void
hash_delegation_locked(struct nfs4_delegation *dp, struct nfs4_file *fp)
{
lockdep_assert_held(&state_lock);
dp->dl_stid.sc_type = NFS4_DELEG_STID;
list_add(&dp->dl_perfile, &fp->fi_delegations);
list_add(&dp->dl_perclnt, &dp->dl_stid.sc_client->cl_delegations);
}
/* Called under the state lock. */ /* Called under the state lock. */
static void static void
unhash_delegation(struct nfs4_delegation *dp) unhash_delegation(struct nfs4_delegation *dp)
{ {
spin_lock(&state_lock);
list_del_init(&dp->dl_perclnt); list_del_init(&dp->dl_perclnt);
spin_lock(&recall_lock);
list_del_init(&dp->dl_perfile); list_del_init(&dp->dl_perfile);
list_del_init(&dp->dl_recall_lru); list_del_init(&dp->dl_recall_lru);
spin_unlock(&recall_lock); spin_unlock(&state_lock);
if (dp->dl_file) {
nfs4_put_deleg_lease(dp->dl_file); nfs4_put_deleg_lease(dp->dl_file);
put_nfs4_file(dp->dl_file); put_nfs4_file(dp->dl_file);
dp->dl_file = NULL; dp->dl_file = NULL;
} }
}
@ -645,6 +658,12 @@ static void unhash_lockowner(struct nfs4_lockowner *lo)
} }
} }
static void nfs4_free_lockowner(struct nfs4_lockowner *lo)
{
kfree(lo->lo_owner.so_owner.data);
kmem_cache_free(lockowner_slab, lo);
}
static void release_lockowner(struct nfs4_lockowner *lo) static void release_lockowner(struct nfs4_lockowner *lo)
{ {
unhash_lockowner(lo); unhash_lockowner(lo);
@ -699,6 +718,12 @@ static void release_last_closed_stateid(struct nfs4_openowner *oo)
} }
} }
static void nfs4_free_openowner(struct nfs4_openowner *oo)
{
kfree(oo->oo_owner.so_owner.data);
kmem_cache_free(openowner_slab, oo);
}
static void release_openowner(struct nfs4_openowner *oo) static void release_openowner(struct nfs4_openowner *oo)
{ {
unhash_openowner(oo); unhash_openowner(oo);
@ -1093,7 +1118,7 @@ static struct nfs4_client *alloc_client(struct xdr_netobj name)
return clp; return clp;
} }
static inline void static void
free_client(struct nfs4_client *clp) free_client(struct nfs4_client *clp)
{ {
struct nfsd_net __maybe_unused *nn = net_generic(clp->net, nfsd_net_id); struct nfsd_net __maybe_unused *nn = net_generic(clp->net, nfsd_net_id);
@ -1136,13 +1161,13 @@ destroy_client(struct nfs4_client *clp)
struct nfsd_net *nn = net_generic(clp->net, nfsd_net_id); struct nfsd_net *nn = net_generic(clp->net, nfsd_net_id);
INIT_LIST_HEAD(&reaplist); INIT_LIST_HEAD(&reaplist);
spin_lock(&recall_lock); spin_lock(&state_lock);
while (!list_empty(&clp->cl_delegations)) { while (!list_empty(&clp->cl_delegations)) {
dp = list_entry(clp->cl_delegations.next, struct nfs4_delegation, dl_perclnt); dp = list_entry(clp->cl_delegations.next, struct nfs4_delegation, dl_perclnt);
list_del_init(&dp->dl_perclnt); list_del_init(&dp->dl_perclnt);
list_move(&dp->dl_recall_lru, &reaplist); list_move(&dp->dl_recall_lru, &reaplist);
} }
spin_unlock(&recall_lock); spin_unlock(&state_lock);
while (!list_empty(&reaplist)) { while (!list_empty(&reaplist)) {
dp = list_entry(reaplist.next, struct nfs4_delegation, dl_recall_lru); dp = list_entry(reaplist.next, struct nfs4_delegation, dl_recall_lru);
destroy_delegation(dp); destroy_delegation(dp);
@ -1544,6 +1569,7 @@ out_err:
void void
nfsd4_store_cache_entry(struct nfsd4_compoundres *resp) nfsd4_store_cache_entry(struct nfsd4_compoundres *resp)
{ {
struct xdr_buf *buf = resp->xdr.buf;
struct nfsd4_slot *slot = resp->cstate.slot; struct nfsd4_slot *slot = resp->cstate.slot;
unsigned int base; unsigned int base;
@ -1557,11 +1583,9 @@ nfsd4_store_cache_entry(struct nfsd4_compoundres *resp)
slot->sl_datalen = 0; slot->sl_datalen = 0;
return; return;
} }
slot->sl_datalen = (char *)resp->p - (char *)resp->cstate.datap; base = resp->cstate.data_offset;
base = (char *)resp->cstate.datap - slot->sl_datalen = buf->len - base;
(char *)resp->xbuf->head[0].iov_base; if (read_bytes_from_xdr_buf(buf, base, slot->sl_data, slot->sl_datalen))
if (read_bytes_from_xdr_buf(resp->xbuf, base, slot->sl_data,
slot->sl_datalen))
WARN("%s: sessions DRC could not cache compound\n", __func__); WARN("%s: sessions DRC could not cache compound\n", __func__);
return; return;
} }
@ -1602,6 +1626,8 @@ nfsd4_replay_cache_entry(struct nfsd4_compoundres *resp,
struct nfsd4_sequence *seq) struct nfsd4_sequence *seq)
{ {
struct nfsd4_slot *slot = resp->cstate.slot; struct nfsd4_slot *slot = resp->cstate.slot;
struct xdr_stream *xdr = &resp->xdr;
__be32 *p;
__be32 status; __be32 status;
dprintk("--> %s slot %p\n", __func__, slot); dprintk("--> %s slot %p\n", __func__, slot);
@ -1610,14 +1636,16 @@ nfsd4_replay_cache_entry(struct nfsd4_compoundres *resp,
if (status) if (status)
return status; return status;
/* The sequence operation has been encoded, cstate->datap set. */ p = xdr_reserve_space(xdr, slot->sl_datalen);
memcpy(resp->cstate.datap, slot->sl_data, slot->sl_datalen); if (!p) {
WARN_ON_ONCE(1);
return nfserr_serverfault;
}
xdr_encode_opaque_fixed(p, slot->sl_data, slot->sl_datalen);
xdr_commit_encode(xdr);
resp->opcnt = slot->sl_opcnt; resp->opcnt = slot->sl_opcnt;
resp->p = resp->cstate.datap + XDR_QUADLEN(slot->sl_datalen); return slot->sl_status;
status = slot->sl_status;
return status;
} }
/* /*
@ -2189,11 +2217,13 @@ nfsd4_sequence(struct svc_rqst *rqstp,
struct nfsd4_sequence *seq) struct nfsd4_sequence *seq)
{ {
struct nfsd4_compoundres *resp = rqstp->rq_resp; struct nfsd4_compoundres *resp = rqstp->rq_resp;
struct xdr_stream *xdr = &resp->xdr;
struct nfsd4_session *session; struct nfsd4_session *session;
struct nfs4_client *clp; struct nfs4_client *clp;
struct nfsd4_slot *slot; struct nfsd4_slot *slot;
struct nfsd4_conn *conn; struct nfsd4_conn *conn;
__be32 status; __be32 status;
int buflen;
struct nfsd_net *nn = net_generic(SVC_NET(rqstp), nfsd_net_id); struct nfsd_net *nn = net_generic(SVC_NET(rqstp), nfsd_net_id);
if (resp->opcnt != 1) if (resp->opcnt != 1)
@ -2262,6 +2292,16 @@ nfsd4_sequence(struct svc_rqst *rqstp,
if (status) if (status)
goto out_put_session; goto out_put_session;
buflen = (seq->cachethis) ?
session->se_fchannel.maxresp_cached :
session->se_fchannel.maxresp_sz;
status = (seq->cachethis) ? nfserr_rep_too_big_to_cache :
nfserr_rep_too_big;
if (xdr_restrict_buflen(xdr, buflen - rqstp->rq_auth_slack))
goto out_put_session;
svc_reserve(rqstp, buflen);
status = nfs_ok;
/* Success! bump slot seqid */ /* Success! bump slot seqid */
slot->sl_seqid = seq->seqid; slot->sl_seqid = seq->seqid;
slot->sl_flags |= NFSD4_SLOT_INUSE; slot->sl_flags |= NFSD4_SLOT_INUSE;
@ -2499,28 +2539,19 @@ static void nfsd4_init_file(struct nfs4_file *fp, struct inode *ino)
fp->fi_lease = NULL; fp->fi_lease = NULL;
memset(fp->fi_fds, 0, sizeof(fp->fi_fds)); memset(fp->fi_fds, 0, sizeof(fp->fi_fds));
memset(fp->fi_access, 0, sizeof(fp->fi_access)); memset(fp->fi_access, 0, sizeof(fp->fi_access));
spin_lock(&recall_lock); spin_lock(&state_lock);
hlist_add_head(&fp->fi_hash, &file_hashtbl[hashval]); hlist_add_head(&fp->fi_hash, &file_hashtbl[hashval]);
spin_unlock(&recall_lock); spin_unlock(&state_lock);
}
static void
nfsd4_free_slab(struct kmem_cache **slab)
{
if (*slab == NULL)
return;
kmem_cache_destroy(*slab);
*slab = NULL;
} }
void void
nfsd4_free_slabs(void) nfsd4_free_slabs(void)
{ {
nfsd4_free_slab(&openowner_slab); kmem_cache_destroy(openowner_slab);
nfsd4_free_slab(&lockowner_slab); kmem_cache_destroy(lockowner_slab);
nfsd4_free_slab(&file_slab); kmem_cache_destroy(file_slab);
nfsd4_free_slab(&stateid_slab); kmem_cache_destroy(stateid_slab);
nfsd4_free_slab(&deleg_slab); kmem_cache_destroy(deleg_slab);
} }
int int
@ -2529,42 +2560,38 @@ nfsd4_init_slabs(void)
openowner_slab = kmem_cache_create("nfsd4_openowners", openowner_slab = kmem_cache_create("nfsd4_openowners",
sizeof(struct nfs4_openowner), 0, 0, NULL); sizeof(struct nfs4_openowner), 0, 0, NULL);
if (openowner_slab == NULL) if (openowner_slab == NULL)
goto out_nomem; goto out;
lockowner_slab = kmem_cache_create("nfsd4_lockowners", lockowner_slab = kmem_cache_create("nfsd4_lockowners",
sizeof(struct nfs4_lockowner), 0, 0, NULL); sizeof(struct nfs4_lockowner), 0, 0, NULL);
if (lockowner_slab == NULL) if (lockowner_slab == NULL)
goto out_nomem; goto out_free_openowner_slab;
file_slab = kmem_cache_create("nfsd4_files", file_slab = kmem_cache_create("nfsd4_files",
sizeof(struct nfs4_file), 0, 0, NULL); sizeof(struct nfs4_file), 0, 0, NULL);
if (file_slab == NULL) if (file_slab == NULL)
goto out_nomem; goto out_free_lockowner_slab;
stateid_slab = kmem_cache_create("nfsd4_stateids", stateid_slab = kmem_cache_create("nfsd4_stateids",
sizeof(struct nfs4_ol_stateid), 0, 0, NULL); sizeof(struct nfs4_ol_stateid), 0, 0, NULL);
if (stateid_slab == NULL) if (stateid_slab == NULL)
goto out_nomem; goto out_free_file_slab;
deleg_slab = kmem_cache_create("nfsd4_delegations", deleg_slab = kmem_cache_create("nfsd4_delegations",
sizeof(struct nfs4_delegation), 0, 0, NULL); sizeof(struct nfs4_delegation), 0, 0, NULL);
if (deleg_slab == NULL) if (deleg_slab == NULL)
goto out_nomem; goto out_free_stateid_slab;
return 0; return 0;
out_nomem:
nfsd4_free_slabs(); out_free_stateid_slab:
kmem_cache_destroy(stateid_slab);
out_free_file_slab:
kmem_cache_destroy(file_slab);
out_free_lockowner_slab:
kmem_cache_destroy(lockowner_slab);
out_free_openowner_slab:
kmem_cache_destroy(openowner_slab);
out:
dprintk("nfsd4: out of memory while initializing nfsv4\n"); dprintk("nfsd4: out of memory while initializing nfsv4\n");
return -ENOMEM; return -ENOMEM;
} }
void nfs4_free_openowner(struct nfs4_openowner *oo)
{
kfree(oo->oo_owner.so_owner.data);
kmem_cache_free(openowner_slab, oo);
}
void nfs4_free_lockowner(struct nfs4_lockowner *lo)
{
kfree(lo->lo_owner.so_owner.data);
kmem_cache_free(lockowner_slab, lo);
}
static void init_nfs4_replay(struct nfs4_replay *rp) static void init_nfs4_replay(struct nfs4_replay *rp)
{ {
rp->rp_status = nfserr_serverfault; rp->rp_status = nfserr_serverfault;
@ -2685,15 +2712,15 @@ find_file(struct inode *ino)
unsigned int hashval = file_hashval(ino); unsigned int hashval = file_hashval(ino);
struct nfs4_file *fp; struct nfs4_file *fp;
spin_lock(&recall_lock); spin_lock(&state_lock);
hlist_for_each_entry(fp, &file_hashtbl[hashval], fi_hash) { hlist_for_each_entry(fp, &file_hashtbl[hashval], fi_hash) {
if (fp->fi_inode == ino) { if (fp->fi_inode == ino) {
get_nfs4_file(fp); get_nfs4_file(fp);
spin_unlock(&recall_lock); spin_unlock(&state_lock);
return fp; return fp;
} }
} }
spin_unlock(&recall_lock); spin_unlock(&state_lock);
return NULL; return NULL;
} }
@ -2730,6 +2757,7 @@ static void nfsd_break_one_deleg(struct nfs4_delegation *dp)
struct nfs4_client *clp = dp->dl_stid.sc_client; struct nfs4_client *clp = dp->dl_stid.sc_client;
struct nfsd_net *nn = net_generic(clp->net, nfsd_net_id); struct nfsd_net *nn = net_generic(clp->net, nfsd_net_id);
lockdep_assert_held(&state_lock);
/* We're assuming the state code never drops its reference /* We're assuming the state code never drops its reference
* without first removing the lease. Since we're in this lease * without first removing the lease. Since we're in this lease
* callback (and since the lease code is serialized by the kernel * callback (and since the lease code is serialized by the kernel
@ -2766,11 +2794,11 @@ static void nfsd_break_deleg_cb(struct file_lock *fl)
*/ */
fl->fl_break_time = 0; fl->fl_break_time = 0;
spin_lock(&recall_lock); spin_lock(&state_lock);
fp->fi_had_conflict = true; fp->fi_had_conflict = true;
list_for_each_entry(dp, &fp->fi_delegations, dl_perfile) list_for_each_entry(dp, &fp->fi_delegations, dl_perfile)
nfsd_break_one_deleg(dp); nfsd_break_one_deleg(dp);
spin_unlock(&recall_lock); spin_unlock(&state_lock);
} }
static static
@ -3047,11 +3075,12 @@ static int nfs4_setlease(struct nfs4_delegation *dp)
status = vfs_setlease(fl->fl_file, fl->fl_type, &fl); status = vfs_setlease(fl->fl_file, fl->fl_type, &fl);
if (status) if (status)
goto out_free; goto out_free;
list_add(&dp->dl_perclnt, &dp->dl_stid.sc_client->cl_delegations);
fp->fi_lease = fl; fp->fi_lease = fl;
fp->fi_deleg_file = get_file(fl->fl_file); fp->fi_deleg_file = get_file(fl->fl_file);
atomic_set(&fp->fi_delegees, 1); atomic_set(&fp->fi_delegees, 1);
list_add(&dp->dl_perfile, &fp->fi_delegations); spin_lock(&state_lock);
hash_delegation_locked(dp, fp);
spin_unlock(&state_lock);
return 0; return 0;
out_free: out_free:
locks_free_lock(fl); locks_free_lock(fl);
@ -3060,33 +3089,21 @@ out_free:
static int nfs4_set_delegation(struct nfs4_delegation *dp, struct nfs4_file *fp) static int nfs4_set_delegation(struct nfs4_delegation *dp, struct nfs4_file *fp)
{ {
int status;
if (fp->fi_had_conflict) if (fp->fi_had_conflict)
return -EAGAIN; return -EAGAIN;
get_nfs4_file(fp); get_nfs4_file(fp);
dp->dl_file = fp; dp->dl_file = fp;
if (!fp->fi_lease) { if (!fp->fi_lease)
status = nfs4_setlease(dp); return nfs4_setlease(dp);
if (status) spin_lock(&state_lock);
goto out_free;
return 0;
}
spin_lock(&recall_lock);
if (fp->fi_had_conflict) {
spin_unlock(&recall_lock);
status = -EAGAIN;
goto out_free;
}
atomic_inc(&fp->fi_delegees); atomic_inc(&fp->fi_delegees);
list_add(&dp->dl_perfile, &fp->fi_delegations); if (fp->fi_had_conflict) {
spin_unlock(&recall_lock); spin_unlock(&state_lock);
list_add(&dp->dl_perclnt, &dp->dl_stid.sc_client->cl_delegations); return -EAGAIN;
}
hash_delegation_locked(dp, fp);
spin_unlock(&state_lock);
return 0; return 0;
out_free:
put_nfs4_file(fp);
dp->dl_file = fp;
return status;
} }
static void nfsd4_open_deleg_none_ext(struct nfsd4_open *open, int status) static void nfsd4_open_deleg_none_ext(struct nfsd4_open *open, int status)
@ -3173,8 +3190,7 @@ nfs4_open_delegation(struct net *net, struct svc_fh *fh,
open->op_delegate_type = NFS4_OPEN_DELEGATE_READ; open->op_delegate_type = NFS4_OPEN_DELEGATE_READ;
return; return;
out_free: out_free:
remove_stid(&dp->dl_stid); destroy_delegation(dp);
nfs4_put_delegation(dp);
out_no_deleg: out_no_deleg:
open->op_delegate_type = NFS4_OPEN_DELEGATE_NONE; open->op_delegate_type = NFS4_OPEN_DELEGATE_NONE;
if (open->op_claim_type == NFS4_OPEN_CLAIM_PREVIOUS && if (open->op_claim_type == NFS4_OPEN_CLAIM_PREVIOUS &&
@ -3391,8 +3407,7 @@ nfs4_laundromat(struct nfsd_net *nn)
struct nfs4_delegation *dp; struct nfs4_delegation *dp;
struct list_head *pos, *next, reaplist; struct list_head *pos, *next, reaplist;
time_t cutoff = get_seconds() - nn->nfsd4_lease; time_t cutoff = get_seconds() - nn->nfsd4_lease;
time_t t, clientid_val = nn->nfsd4_lease; time_t t, new_timeo = nn->nfsd4_lease;
time_t u, test_val = nn->nfsd4_lease;
nfs4_lock_state(); nfs4_lock_state();
@ -3404,8 +3419,7 @@ nfs4_laundromat(struct nfsd_net *nn)
clp = list_entry(pos, struct nfs4_client, cl_lru); clp = list_entry(pos, struct nfs4_client, cl_lru);
if (time_after((unsigned long)clp->cl_time, (unsigned long)cutoff)) { if (time_after((unsigned long)clp->cl_time, (unsigned long)cutoff)) {
t = clp->cl_time - cutoff; t = clp->cl_time - cutoff;
if (clientid_val > t) new_timeo = min(new_timeo, t);
clientid_val = t;
break; break;
} }
if (mark_client_expired_locked(clp)) { if (mark_client_expired_locked(clp)) {
@ -3422,39 +3436,35 @@ nfs4_laundromat(struct nfsd_net *nn)
clp->cl_clientid.cl_id); clp->cl_clientid.cl_id);
expire_client(clp); expire_client(clp);
} }
spin_lock(&recall_lock); spin_lock(&state_lock);
list_for_each_safe(pos, next, &nn->del_recall_lru) { list_for_each_safe(pos, next, &nn->del_recall_lru) {
dp = list_entry (pos, struct nfs4_delegation, dl_recall_lru); dp = list_entry (pos, struct nfs4_delegation, dl_recall_lru);
if (net_generic(dp->dl_stid.sc_client->net, nfsd_net_id) != nn) if (net_generic(dp->dl_stid.sc_client->net, nfsd_net_id) != nn)
continue; continue;
if (time_after((unsigned long)dp->dl_time, (unsigned long)cutoff)) { if (time_after((unsigned long)dp->dl_time, (unsigned long)cutoff)) {
u = dp->dl_time - cutoff; t = dp->dl_time - cutoff;
if (test_val > u) new_timeo = min(new_timeo, t);
test_val = u;
break; break;
} }
list_move(&dp->dl_recall_lru, &reaplist); list_move(&dp->dl_recall_lru, &reaplist);
} }
spin_unlock(&recall_lock); spin_unlock(&state_lock);
list_for_each_safe(pos, next, &reaplist) { list_for_each_safe(pos, next, &reaplist) {
dp = list_entry (pos, struct nfs4_delegation, dl_recall_lru); dp = list_entry (pos, struct nfs4_delegation, dl_recall_lru);
revoke_delegation(dp); revoke_delegation(dp);
} }
test_val = nn->nfsd4_lease;
list_for_each_safe(pos, next, &nn->close_lru) { list_for_each_safe(pos, next, &nn->close_lru) {
oo = container_of(pos, struct nfs4_openowner, oo_close_lru); oo = container_of(pos, struct nfs4_openowner, oo_close_lru);
if (time_after((unsigned long)oo->oo_time, (unsigned long)cutoff)) { if (time_after((unsigned long)oo->oo_time, (unsigned long)cutoff)) {
u = oo->oo_time - cutoff; t = oo->oo_time - cutoff;
if (test_val > u) new_timeo = min(new_timeo, t);
test_val = u;
break; break;
} }
release_openowner(oo); release_openowner(oo);
} }
if (clientid_val < NFSD_LAUNDROMAT_MINTIMEOUT) new_timeo = max_t(time_t, new_timeo, NFSD_LAUNDROMAT_MINTIMEOUT);
clientid_val = NFSD_LAUNDROMAT_MINTIMEOUT;
nfs4_unlock_state(); nfs4_unlock_state();
return clientid_val; return new_timeo;
} }
static struct workqueue_struct *laundry_wq; static struct workqueue_struct *laundry_wq;
@ -3654,6 +3664,7 @@ nfs4_preprocess_stateid_op(struct net *net, struct nfsd4_compound_state *cstate,
struct svc_fh *current_fh = &cstate->current_fh; struct svc_fh *current_fh = &cstate->current_fh;
struct inode *ino = current_fh->fh_dentry->d_inode; struct inode *ino = current_fh->fh_dentry->d_inode;
struct nfsd_net *nn = net_generic(net, nfsd_net_id); struct nfsd_net *nn = net_generic(net, nfsd_net_id);
struct file *file = NULL;
__be32 status; __be32 status;
if (filpp) if (filpp)
@ -3665,10 +3676,12 @@ nfs4_preprocess_stateid_op(struct net *net, struct nfsd4_compound_state *cstate,
if (ZERO_STATEID(stateid) || ONE_STATEID(stateid)) if (ZERO_STATEID(stateid) || ONE_STATEID(stateid))
return check_special_stateids(net, current_fh, stateid, flags); return check_special_stateids(net, current_fh, stateid, flags);
nfs4_lock_state();
status = nfsd4_lookup_stateid(stateid, NFS4_DELEG_STID|NFS4_OPEN_STID|NFS4_LOCK_STID, status = nfsd4_lookup_stateid(stateid, NFS4_DELEG_STID|NFS4_OPEN_STID|NFS4_LOCK_STID,
&s, cstate->minorversion, nn); &s, cstate->minorversion, nn);
if (status) if (status)
return status; goto out;
status = check_stateid_generation(stateid, &s->sc_stateid, nfsd4_has_session(cstate)); status = check_stateid_generation(stateid, &s->sc_stateid, nfsd4_has_session(cstate));
if (status) if (status)
goto out; goto out;
@ -3679,8 +3692,8 @@ nfs4_preprocess_stateid_op(struct net *net, struct nfsd4_compound_state *cstate,
if (status) if (status)
goto out; goto out;
if (filpp) { if (filpp) {
*filpp = dp->dl_file->fi_deleg_file; file = dp->dl_file->fi_deleg_file;
if (!*filpp) { if (!file) {
WARN_ON_ONCE(1); WARN_ON_ONCE(1);
status = nfserr_serverfault; status = nfserr_serverfault;
goto out; goto out;
@ -3701,16 +3714,20 @@ nfs4_preprocess_stateid_op(struct net *net, struct nfsd4_compound_state *cstate,
goto out; goto out;
if (filpp) { if (filpp) {
if (flags & RD_STATE) if (flags & RD_STATE)
*filpp = find_readable_file(stp->st_file); file = find_readable_file(stp->st_file);
else else
*filpp = find_writeable_file(stp->st_file); file = find_writeable_file(stp->st_file);
} }
break; break;
default: default:
return nfserr_bad_stateid; status = nfserr_bad_stateid;
goto out;
} }
status = nfs_ok; status = nfs_ok;
if (file)
*filpp = get_file(file);
out: out:
nfs4_unlock_state();
return status; return status;
} }
@ -3726,7 +3743,7 @@ nfsd4_free_lock_stateid(struct nfs4_ol_stateid *stp)
* correspondance, and we have to delete the lockowner when we * correspondance, and we have to delete the lockowner when we
* delete the lock stateid: * delete the lock stateid:
*/ */
unhash_lockowner(lo); release_lockowner(lo);
return nfs_ok; return nfs_ok;
} }
@ -4896,6 +4913,7 @@ static u64 nfsd_find_all_delegations(struct nfs4_client *clp, u64 max,
struct nfs4_delegation *dp, *next; struct nfs4_delegation *dp, *next;
u64 count = 0; u64 count = 0;
lockdep_assert_held(&state_lock);
list_for_each_entry_safe(dp, next, &clp->cl_delegations, dl_perclnt) { list_for_each_entry_safe(dp, next, &clp->cl_delegations, dl_perclnt) {
if (victims) if (victims)
list_move(&dp->dl_recall_lru, victims); list_move(&dp->dl_recall_lru, victims);
@ -4911,9 +4929,9 @@ u64 nfsd_forget_client_delegations(struct nfs4_client *clp, u64 max)
LIST_HEAD(victims); LIST_HEAD(victims);
u64 count; u64 count;
spin_lock(&recall_lock); spin_lock(&state_lock);
count = nfsd_find_all_delegations(clp, max, &victims); count = nfsd_find_all_delegations(clp, max, &victims);
spin_unlock(&recall_lock); spin_unlock(&state_lock);
list_for_each_entry_safe(dp, next, &victims, dl_recall_lru) list_for_each_entry_safe(dp, next, &victims, dl_recall_lru)
revoke_delegation(dp); revoke_delegation(dp);
@ -4927,11 +4945,11 @@ u64 nfsd_recall_client_delegations(struct nfs4_client *clp, u64 max)
LIST_HEAD(victims); LIST_HEAD(victims);
u64 count; u64 count;
spin_lock(&recall_lock); spin_lock(&state_lock);
count = nfsd_find_all_delegations(clp, max, &victims); count = nfsd_find_all_delegations(clp, max, &victims);
list_for_each_entry_safe(dp, next, &victims, dl_recall_lru) list_for_each_entry_safe(dp, next, &victims, dl_recall_lru)
nfsd_break_one_deleg(dp); nfsd_break_one_deleg(dp);
spin_unlock(&recall_lock); spin_unlock(&state_lock);
return count; return count;
} }
@ -4940,9 +4958,9 @@ u64 nfsd_print_client_delegations(struct nfs4_client *clp, u64 max)
{ {
u64 count = 0; u64 count = 0;
spin_lock(&recall_lock); spin_lock(&state_lock);
count = nfsd_find_all_delegations(clp, max, NULL); count = nfsd_find_all_delegations(clp, max, NULL);
spin_unlock(&recall_lock); spin_unlock(&state_lock);
nfsd_print_count(clp, count, "delegations"); nfsd_print_count(clp, count, "delegations");
return count; return count;
@ -4983,13 +5001,6 @@ struct nfs4_client *nfsd_find_client(struct sockaddr_storage *addr, size_t addr_
#endif /* CONFIG_NFSD_FAULT_INJECTION */ #endif /* CONFIG_NFSD_FAULT_INJECTION */
/* initialization to perform at module load time: */
void
nfs4_state_init(void)
{
}
/* /*
* Since the lifetime of a delegation isn't limited to that of an open, a * Since the lifetime of a delegation isn't limited to that of an open, a
* client may quite reasonably hang on to a delegation as long as it has * client may quite reasonably hang on to a delegation as long as it has
@ -5160,12 +5171,12 @@ nfs4_state_shutdown_net(struct net *net)
nfs4_lock_state(); nfs4_lock_state();
INIT_LIST_HEAD(&reaplist); INIT_LIST_HEAD(&reaplist);
spin_lock(&recall_lock); spin_lock(&state_lock);
list_for_each_safe(pos, next, &nn->del_recall_lru) { list_for_each_safe(pos, next, &nn->del_recall_lru) {
dp = list_entry (pos, struct nfs4_delegation, dl_recall_lru); dp = list_entry (pos, struct nfs4_delegation, dl_recall_lru);
list_move(&dp->dl_recall_lru, &reaplist); list_move(&dp->dl_recall_lru, &reaplist);
} }
spin_unlock(&recall_lock); spin_unlock(&state_lock);
list_for_each_safe(pos, next, &reaplist) { list_for_each_safe(pos, next, &reaplist) {
dp = list_entry (pos, struct nfs4_delegation, dl_recall_lru); dp = list_entry (pos, struct nfs4_delegation, dl_recall_lru);
destroy_delegation(dp); destroy_delegation(dp);

File diff suppressed because it is too large Load Diff

View File

@ -224,13 +224,6 @@ hash_refile(struct svc_cacherep *rp)
hlist_add_head(&rp->c_hash, cache_hash + hash_32(rp->c_xid, maskbits)); hlist_add_head(&rp->c_hash, cache_hash + hash_32(rp->c_xid, maskbits));
} }
static inline bool
nfsd_cache_entry_expired(struct svc_cacherep *rp)
{
return rp->c_state != RC_INPROG &&
time_after(jiffies, rp->c_timestamp + RC_EXPIRE);
}
/* /*
* Walk the LRU list and prune off entries that are older than RC_EXPIRE. * Walk the LRU list and prune off entries that are older than RC_EXPIRE.
* Also prune the oldest ones when the total exceeds the max number of entries. * Also prune the oldest ones when the total exceeds the max number of entries.
@ -242,8 +235,14 @@ prune_cache_entries(void)
long freed = 0; long freed = 0;
list_for_each_entry_safe(rp, tmp, &lru_head, c_lru) { list_for_each_entry_safe(rp, tmp, &lru_head, c_lru) {
if (!nfsd_cache_entry_expired(rp) && /*
num_drc_entries <= max_drc_entries) * Don't free entries attached to calls that are still
* in-progress, but do keep scanning the list.
*/
if (rp->c_state == RC_INPROG)
continue;
if (num_drc_entries <= max_drc_entries &&
time_before(jiffies, rp->c_timestamp + RC_EXPIRE))
break; break;
nfsd_reply_cache_free_locked(rp); nfsd_reply_cache_free_locked(rp);
freed++; freed++;

View File

@ -1179,7 +1179,6 @@ static int __init init_nfsd(void)
retval = nfsd4_init_slabs(); retval = nfsd4_init_slabs();
if (retval) if (retval)
goto out_unregister_pernet; goto out_unregister_pernet;
nfs4_state_init();
retval = nfsd_fault_inject_init(); /* nfsd fault injection controls */ retval = nfsd_fault_inject_init(); /* nfsd fault injection controls */
if (retval) if (retval)
goto out_free_slabs; goto out_free_slabs;

View File

@ -15,11 +15,20 @@
#include <linux/nfs2.h> #include <linux/nfs2.h>
#include <linux/nfs3.h> #include <linux/nfs3.h>
#include <linux/nfs4.h> #include <linux/nfs4.h>
#include <linux/sunrpc/svc.h>
#include <linux/sunrpc/msg_prot.h> #include <linux/sunrpc/msg_prot.h>
#include <linux/nfsd/debug.h> #include <uapi/linux/nfsd/debug.h>
#include <linux/nfsd/export.h>
#include <linux/nfsd/stats.h> #include "stats.h"
#include "export.h"
#undef ifdebug
#ifdef NFSD_DEBUG
# define ifdebug(flag) if (nfsd_debug & NFSDDBG_##flag)
#else
# define ifdebug(flag) if (0)
#endif
/* /*
* nfsd version * nfsd version
@ -106,7 +115,6 @@ static inline int nfsd_v4client(struct svc_rqst *rq)
*/ */
#ifdef CONFIG_NFSD_V4 #ifdef CONFIG_NFSD_V4
extern unsigned long max_delegations; extern unsigned long max_delegations;
void nfs4_state_init(void);
int nfsd4_init_slabs(void); int nfsd4_init_slabs(void);
void nfsd4_free_slabs(void); void nfsd4_free_slabs(void);
int nfs4_state_start(void); int nfs4_state_start(void);
@ -117,7 +125,6 @@ void nfs4_reset_lease(time_t leasetime);
int nfs4_reset_recoverydir(char *recdir); int nfs4_reset_recoverydir(char *recdir);
char * nfs4_recoverydir(void); char * nfs4_recoverydir(void);
#else #else
static inline void nfs4_state_init(void) { }
static inline int nfsd4_init_slabs(void) { return 0; } static inline int nfsd4_init_slabs(void) { return 0; }
static inline void nfsd4_free_slabs(void) { } static inline void nfsd4_free_slabs(void) { }
static inline int nfs4_state_start(void) { return 0; } static inline int nfs4_state_start(void) { return 0; }

View File

@ -88,8 +88,7 @@ static __be32 nfsd_setuser_and_check_port(struct svc_rqst *rqstp,
/* Check if the request originated from a secure port. */ /* Check if the request originated from a secure port. */
if (!rqstp->rq_secure && !(flags & NFSEXP_INSECURE_PORT)) { if (!rqstp->rq_secure && !(flags & NFSEXP_INSECURE_PORT)) {
RPC_IFDEBUG(char buf[RPC_MAX_ADDRBUFLEN]); RPC_IFDEBUG(char buf[RPC_MAX_ADDRBUFLEN]);
dprintk(KERN_WARNING dprintk("nfsd: request from insecure port %s!\n",
"nfsd: request from insecure port %s!\n",
svc_print_addr(rqstp, buf, sizeof(buf))); svc_print_addr(rqstp, buf, sizeof(buf)));
return nfserr_perm; return nfserr_perm;
} }
@ -169,8 +168,8 @@ static __be32 nfsd_set_fh_dentry(struct svc_rqst *rqstp, struct svc_fh *fhp)
data_left -= len; data_left -= len;
if (data_left < 0) if (data_left < 0)
return error; return error;
exp = rqst_exp_find(rqstp, fh->fh_fsid_type, fh->fh_auth); exp = rqst_exp_find(rqstp, fh->fh_fsid_type, fh->fh_fsid);
fid = (struct fid *)(fh->fh_auth + len); fid = (struct fid *)(fh->fh_fsid + len);
} else { } else {
__u32 tfh[2]; __u32 tfh[2];
dev_t xdev; dev_t xdev;
@ -385,7 +384,7 @@ static void _fh_update(struct svc_fh *fhp, struct svc_export *exp,
{ {
if (dentry != exp->ex_path.dentry) { if (dentry != exp->ex_path.dentry) {
struct fid *fid = (struct fid *) struct fid *fid = (struct fid *)
(fhp->fh_handle.fh_auth + fhp->fh_handle.fh_size/4 - 1); (fhp->fh_handle.fh_fsid + fhp->fh_handle.fh_size/4 - 1);
int maxsize = (fhp->fh_maxsize - fhp->fh_handle.fh_size)/4; int maxsize = (fhp->fh_maxsize - fhp->fh_handle.fh_size)/4;
int subtreecheck = !(exp->ex_flags & NFSEXP_NOSUBTREECHECK); int subtreecheck = !(exp->ex_flags & NFSEXP_NOSUBTREECHECK);
@ -513,7 +512,6 @@ fh_compose(struct svc_fh *fhp, struct svc_export *exp, struct dentry *dentry,
*/ */
struct inode * inode = dentry->d_inode; struct inode * inode = dentry->d_inode;
__u32 *datap;
dev_t ex_dev = exp_sb(exp)->s_dev; dev_t ex_dev = exp_sb(exp)->s_dev;
dprintk("nfsd: fh_compose(exp %02x:%02x/%ld %pd2, ino=%ld)\n", dprintk("nfsd: fh_compose(exp %02x:%02x/%ld %pd2, ino=%ld)\n",
@ -557,17 +555,16 @@ fh_compose(struct svc_fh *fhp, struct svc_export *exp, struct dentry *dentry,
if (inode) if (inode)
_fh_update_old(dentry, exp, &fhp->fh_handle); _fh_update_old(dentry, exp, &fhp->fh_handle);
} else { } else {
int len; fhp->fh_handle.fh_size =
key_len(fhp->fh_handle.fh_fsid_type) + 4;
fhp->fh_handle.fh_auth_type = 0; fhp->fh_handle.fh_auth_type = 0;
datap = fhp->fh_handle.fh_auth+0;
mk_fsid(fhp->fh_handle.fh_fsid_type, datap, ex_dev, mk_fsid(fhp->fh_handle.fh_fsid_type,
fhp->fh_handle.fh_fsid,
ex_dev,
exp->ex_path.dentry->d_inode->i_ino, exp->ex_path.dentry->d_inode->i_ino,
exp->ex_fsid, exp->ex_uuid); exp->ex_fsid, exp->ex_uuid);
len = key_len(fhp->fh_handle.fh_fsid_type);
datap += len/4;
fhp->fh_handle.fh_size = 4 + len;
if (inode) if (inode)
_fh_update(fhp, exp, dentry); _fh_update(fhp, exp, dentry);
if (fhp->fh_handle.fh_fileid_type == FILEID_INVALID) { if (fhp->fh_handle.fh_fileid_type == FILEID_INVALID) {

View File

@ -1,9 +1,58 @@
/* Copyright (C) 1995, 1996, 1997 Olaf Kirch <okir@monad.swb.de> */ /*
* Copyright (C) 1995, 1996, 1997 Olaf Kirch <okir@monad.swb.de>
*
* This file describes the layout of the file handles as passed
* over the wire.
*/
#ifndef _LINUX_NFSD_NFSFH_H
#define _LINUX_NFSD_NFSFH_H
#ifndef _LINUX_NFSD_FH_INT_H #include <linux/sunrpc/svc.h>
#define _LINUX_NFSD_FH_INT_H #include <uapi/linux/nfsd/nfsfh.h>
#include <linux/nfsd/nfsfh.h> static inline __u32 ino_t_to_u32(ino_t ino)
{
return (__u32) ino;
}
static inline ino_t u32_to_ino_t(__u32 uino)
{
return (ino_t) uino;
}
/*
* This is the internal representation of an NFS handle used in knfsd.
* pre_mtime/post_version will be used to support wcc_attr's in NFSv3.
*/
typedef struct svc_fh {
struct knfsd_fh fh_handle; /* FH data */
struct dentry * fh_dentry; /* validated dentry */
struct svc_export * fh_export; /* export pointer */
int fh_maxsize; /* max size for fh_handle */
unsigned char fh_locked; /* inode locked by us */
unsigned char fh_want_write; /* remount protection taken */
#ifdef CONFIG_NFSD_V3
unsigned char fh_post_saved; /* post-op attrs saved */
unsigned char fh_pre_saved; /* pre-op attrs saved */
/* Pre-op attributes saved during fh_lock */
__u64 fh_pre_size; /* size before operation */
struct timespec fh_pre_mtime; /* mtime before oper */
struct timespec fh_pre_ctime; /* ctime before oper */
/*
* pre-op nfsv4 change attr: note must check IS_I_VERSION(inode)
* to find out if it is valid.
*/
u64 fh_pre_change;
/* Post-op attributes saved in fh_unlock */
struct kstat fh_post_attr; /* full attrs after operation */
u64 fh_post_change; /* nfsv4 change; see above */
#endif /* CONFIG_NFSD_V3 */
} svc_fh;
enum nfsd_fsid { enum nfsd_fsid {
FSID_DEV = 0, FSID_DEV = 0,
@ -215,4 +264,4 @@ fh_unlock(struct svc_fh *fhp)
} }
} }
#endif /* _LINUX_NFSD_FH_INT_H */ #endif /* _LINUX_NFSD_NFSFH_H */

View File

@ -591,12 +591,6 @@ nfsd(void *vrqstp)
nfsdstats.th_cnt++; nfsdstats.th_cnt++;
mutex_unlock(&nfsd_mutex); mutex_unlock(&nfsd_mutex);
/*
* We want less throttling in balance_dirty_pages() so that nfs to
* localhost doesn't cause nfsd to lock up due to all the client's
* dirty pages.
*/
current->flags |= PF_LESS_THROTTLE;
set_freezable(); set_freezable();
/* /*

View File

@ -214,7 +214,8 @@ nfssvc_decode_void(struct svc_rqst *rqstp, __be32 *p, void *dummy)
int int
nfssvc_decode_fhandle(struct svc_rqst *rqstp, __be32 *p, struct nfsd_fhandle *args) nfssvc_decode_fhandle(struct svc_rqst *rqstp, __be32 *p, struct nfsd_fhandle *args)
{ {
if (!(p = decode_fh(p, &args->fh))) p = decode_fh(p, &args->fh);
if (!p)
return 0; return 0;
return xdr_argsize_check(rqstp, p); return xdr_argsize_check(rqstp, p);
} }
@ -248,7 +249,8 @@ nfssvc_decode_readargs(struct svc_rqst *rqstp, __be32 *p,
{ {
unsigned int len; unsigned int len;
int v; int v;
if (!(p = decode_fh(p, &args->fh))) p = decode_fh(p, &args->fh);
if (!p)
return 0; return 0;
args->offset = ntohl(*p++); args->offset = ntohl(*p++);
@ -281,7 +283,8 @@ nfssvc_decode_writeargs(struct svc_rqst *rqstp, __be32 *p,
unsigned int len, hdr, dlen; unsigned int len, hdr, dlen;
int v; int v;
if (!(p = decode_fh(p, &args->fh))) p = decode_fh(p, &args->fh);
if (!p)
return 0; return 0;
p++; /* beginoffset */ p++; /* beginoffset */
@ -355,7 +358,8 @@ nfssvc_decode_renameargs(struct svc_rqst *rqstp, __be32 *p,
int int
nfssvc_decode_readlinkargs(struct svc_rqst *rqstp, __be32 *p, struct nfsd_readlinkargs *args) nfssvc_decode_readlinkargs(struct svc_rqst *rqstp, __be32 *p, struct nfsd_readlinkargs *args)
{ {
if (!(p = decode_fh(p, &args->fh))) p = decode_fh(p, &args->fh);
if (!p)
return 0; return 0;
args->buffer = page_address(*(rqstp->rq_next_page++)); args->buffer = page_address(*(rqstp->rq_next_page++));
@ -391,7 +395,8 @@ int
nfssvc_decode_readdirargs(struct svc_rqst *rqstp, __be32 *p, nfssvc_decode_readdirargs(struct svc_rqst *rqstp, __be32 *p,
struct nfsd_readdirargs *args) struct nfsd_readdirargs *args)
{ {
if (!(p = decode_fh(p, &args->fh))) p = decode_fh(p, &args->fh);
if (!p)
return 0; return 0;
args->cookie = ntohl(*p++); args->cookie = ntohl(*p++);
args->count = ntohl(*p++); args->count = ntohl(*p++);

View File

@ -37,7 +37,6 @@
#include <linux/idr.h> #include <linux/idr.h>
#include <linux/sunrpc/svc_xprt.h> #include <linux/sunrpc/svc_xprt.h>
#include <linux/nfsd/nfsfh.h>
#include "nfsfh.h" #include "nfsfh.h"
typedef struct { typedef struct {
@ -123,7 +122,7 @@ static inline struct nfs4_delegation *delegstateid(struct nfs4_stid *s)
/* Maximum number of operations per session compound */ /* Maximum number of operations per session compound */
#define NFSD_MAX_OPS_PER_COMPOUND 16 #define NFSD_MAX_OPS_PER_COMPOUND 16
/* Maximum session per slot cache size */ /* Maximum session per slot cache size */
#define NFSD_SLOT_CACHE_SIZE 1024 #define NFSD_SLOT_CACHE_SIZE 2048
/* Maximum number of NFSD_SLOT_CACHE_SIZE slots per session */ /* Maximum number of NFSD_SLOT_CACHE_SIZE slots per session */
#define NFSD_CACHE_SIZE_SLOTS_PER_SESSION 32 #define NFSD_CACHE_SIZE_SLOTS_PER_SESSION 32
#define NFSD_MAX_MEM_PER_SESSION \ #define NFSD_MAX_MEM_PER_SESSION \
@ -464,8 +463,6 @@ extern void nfs4_release_reclaim(struct nfsd_net *);
extern struct nfs4_client_reclaim *nfsd4_find_reclaim_client(const char *recdir, extern struct nfs4_client_reclaim *nfsd4_find_reclaim_client(const char *recdir,
struct nfsd_net *nn); struct nfsd_net *nn);
extern __be32 nfs4_check_open_reclaim(clientid_t *clid, bool sessions, struct nfsd_net *nn); extern __be32 nfs4_check_open_reclaim(clientid_t *clid, bool sessions, struct nfsd_net *nn);
extern void nfs4_free_openowner(struct nfs4_openowner *);
extern void nfs4_free_lockowner(struct nfs4_lockowner *);
extern int set_callback_cred(void); extern int set_callback_cred(void);
extern void nfsd4_init_callback(struct nfsd4_callback *); extern void nfsd4_init_callback(struct nfsd4_callback *);
extern void nfsd4_probe_callback(struct nfs4_client *clp); extern void nfsd4_probe_callback(struct nfs4_client *clp);

View File

@ -24,7 +24,6 @@
#include <linux/seq_file.h> #include <linux/seq_file.h>
#include <linux/module.h> #include <linux/module.h>
#include <linux/sunrpc/stats.h> #include <linux/sunrpc/stats.h>
#include <linux/nfsd/stats.h>
#include <net/net_namespace.h> #include <net/net_namespace.h>
#include "nfsd.h" #include "nfsd.h"

View File

@ -1,12 +1,10 @@
/* /*
* linux/include/linux/nfsd/stats.h
*
* Statistics for NFS server. * Statistics for NFS server.
* *
* Copyright (C) 1995, 1996 Olaf Kirch <okir@monad.swb.de> * Copyright (C) 1995, 1996 Olaf Kirch <okir@monad.swb.de>
*/ */
#ifndef LINUX_NFSD_STATS_H #ifndef _NFSD_STATS_H
#define LINUX_NFSD_STATS_H #define _NFSD_STATS_H
#include <uapi/linux/nfsd/stats.h> #include <uapi/linux/nfsd/stats.h>
@ -42,4 +40,4 @@ extern struct svc_stat nfsd_svcstats;
void nfsd_stat_init(void); void nfsd_stat_init(void);
void nfsd_stat_shutdown(void); void nfsd_stat_shutdown(void);
#endif /* LINUX_NFSD_STATS_H */ #endif /* _NFSD_STATS_H */

View File

@ -820,55 +820,54 @@ static int nfsd_direct_splice_actor(struct pipe_inode_info *pipe,
return __splice_from_pipe(pipe, sd, nfsd_splice_actor); return __splice_from_pipe(pipe, sd, nfsd_splice_actor);
} }
static __be32 __be32 nfsd_finish_read(struct file *file, unsigned long *count, int host_err)
nfsd_vfs_read(struct svc_rqst *rqstp, struct svc_fh *fhp, struct file *file,
loff_t offset, struct kvec *vec, int vlen, unsigned long *count)
{ {
mm_segment_t oldfs; if (host_err >= 0) {
__be32 err; nfsdstats.io_read += host_err;
int host_err; *count = host_err;
fsnotify_access(file);
return 0;
} else
return nfserrno(host_err);
}
err = nfserr_perm; int nfsd_splice_read(struct svc_rqst *rqstp,
struct file *file, loff_t offset, unsigned long *count)
if (file->f_op->splice_read && rqstp->rq_splice_ok) { {
struct splice_desc sd = { struct splice_desc sd = {
.len = 0, .len = 0,
.total_len = *count, .total_len = *count,
.pos = offset, .pos = offset,
.u.data = rqstp, .u.data = rqstp,
}; };
int host_err;
rqstp->rq_next_page = rqstp->rq_respages + 1; rqstp->rq_next_page = rqstp->rq_respages + 1;
host_err = splice_direct_to_actor(file, &sd, nfsd_direct_splice_actor); host_err = splice_direct_to_actor(file, &sd, nfsd_direct_splice_actor);
} else { return nfsd_finish_read(file, count, host_err);
}
int nfsd_readv(struct file *file, loff_t offset, struct kvec *vec, int vlen,
unsigned long *count)
{
mm_segment_t oldfs;
int host_err;
oldfs = get_fs(); oldfs = get_fs();
set_fs(KERNEL_DS); set_fs(KERNEL_DS);
host_err = vfs_readv(file, (struct iovec __user *)vec, vlen, &offset); host_err = vfs_readv(file, (struct iovec __user *)vec, vlen, &offset);
set_fs(oldfs); set_fs(oldfs);
return nfsd_finish_read(file, count, host_err);
} }
if (host_err >= 0) { static __be32
nfsdstats.io_read += host_err; nfsd_vfs_read(struct svc_rqst *rqstp, struct file *file,
*count = host_err; loff_t offset, struct kvec *vec, int vlen, unsigned long *count)
err = 0;
fsnotify_access(file);
} else
err = nfserrno(host_err);
return err;
}
static void kill_suid(struct dentry *dentry)
{ {
struct iattr ia; if (file->f_op->splice_read && rqstp->rq_splice_ok)
ia.ia_valid = ATTR_KILL_SUID | ATTR_KILL_SGID | ATTR_KILL_PRIV; return nfsd_splice_read(rqstp, file, offset, count);
else
mutex_lock(&dentry->d_inode->i_mutex); return nfsd_readv(file, offset, vec, vlen, count);
/*
* Note we call this on write, so notify_change will not
* encounter any conflicting delegations:
*/
notify_change(dentry, &ia, NULL);
mutex_unlock(&dentry->d_inode->i_mutex);
} }
/* /*
@ -922,6 +921,16 @@ nfsd_vfs_write(struct svc_rqst *rqstp, struct svc_fh *fhp, struct file *file,
int stable = *stablep; int stable = *stablep;
int use_wgather; int use_wgather;
loff_t pos = offset; loff_t pos = offset;
unsigned int pflags = current->flags;
if (rqstp->rq_local)
/*
* We want less throttling in balance_dirty_pages()
* and shrink_inactive_list() so that nfs to
* localhost doesn't cause nfsd to lock up due to all
* the client's dirty pages or its congested queue.
*/
current->flags |= PF_LESS_THROTTLE;
dentry = file->f_path.dentry; dentry = file->f_path.dentry;
inode = dentry->d_inode; inode = dentry->d_inode;
@ -942,10 +951,6 @@ nfsd_vfs_write(struct svc_rqst *rqstp, struct svc_fh *fhp, struct file *file,
nfsdstats.io_write += host_err; nfsdstats.io_write += host_err;
fsnotify_modify(file); fsnotify_modify(file);
/* clear setuid/setgid flag after write */
if (inode->i_mode & (S_ISUID | S_ISGID))
kill_suid(dentry);
if (stable) { if (stable) {
if (use_wgather) if (use_wgather)
host_err = wait_for_concurrent_writes(file); host_err = wait_for_concurrent_writes(file);
@ -959,9 +964,45 @@ out_nfserr:
err = 0; err = 0;
else else
err = nfserrno(host_err); err = nfserrno(host_err);
if (rqstp->rq_local)
tsk_restore_flags(current, pflags, PF_LESS_THROTTLE);
return err; return err;
} }
__be32 nfsd_get_tmp_read_open(struct svc_rqst *rqstp, struct svc_fh *fhp,
struct file **file, struct raparms **ra)
{
struct inode *inode;
__be32 err;
err = nfsd_open(rqstp, fhp, S_IFREG, NFSD_MAY_READ, file);
if (err)
return err;
inode = file_inode(*file);
/* Get readahead parameters */
*ra = nfsd_get_raparms(inode->i_sb->s_dev, inode->i_ino);
if (*ra && (*ra)->p_set)
(*file)->f_ra = (*ra)->p_ra;
return nfs_ok;
}
void nfsd_put_tmp_read_open(struct file *file, struct raparms *ra)
{
/* Write back readahead params */
if (ra) {
struct raparm_hbucket *rab = &raparm_hash[ra->p_hindex];
spin_lock(&rab->pb_lock);
ra->p_ra = file->f_ra;
ra->p_set = 1;
ra->p_count--;
spin_unlock(&rab->pb_lock);
}
nfsd_close(file);
}
/* /*
* Read data from a file. count must contain the requested read count * Read data from a file. count must contain the requested read count
* on entry. On return, *count contains the number of bytes actually read. * on entry. On return, *count contains the number of bytes actually read.
@ -971,55 +1012,17 @@ __be32 nfsd_read(struct svc_rqst *rqstp, struct svc_fh *fhp,
loff_t offset, struct kvec *vec, int vlen, unsigned long *count) loff_t offset, struct kvec *vec, int vlen, unsigned long *count)
{ {
struct file *file; struct file *file;
struct inode *inode;
struct raparms *ra; struct raparms *ra;
__be32 err; __be32 err;
err = nfsd_open(rqstp, fhp, S_IFREG, NFSD_MAY_READ, &file); err = nfsd_get_tmp_read_open(rqstp, fhp, &file, &ra);
if (err) if (err)
return err; return err;
inode = file_inode(file); err = nfsd_vfs_read(rqstp, file, offset, vec, vlen, count);
/* Get readahead parameters */ nfsd_put_tmp_read_open(file, ra);
ra = nfsd_get_raparms(inode->i_sb->s_dev, inode->i_ino);
if (ra && ra->p_set)
file->f_ra = ra->p_ra;
err = nfsd_vfs_read(rqstp, fhp, file, offset, vec, vlen, count);
/* Write back readahead params */
if (ra) {
struct raparm_hbucket *rab = &raparm_hash[ra->p_hindex];
spin_lock(&rab->pb_lock);
ra->p_ra = file->f_ra;
ra->p_set = 1;
ra->p_count--;
spin_unlock(&rab->pb_lock);
}
nfsd_close(file);
return err;
}
/* As above, but use the provided file descriptor. */
__be32
nfsd_read_file(struct svc_rqst *rqstp, struct svc_fh *fhp, struct file *file,
loff_t offset, struct kvec *vec, int vlen,
unsigned long *count)
{
__be32 err;
if (file) {
err = nfsd_permission(rqstp, fhp->fh_export, fhp->fh_dentry,
NFSD_MAY_READ|NFSD_MAY_OWNER_OVERRIDE);
if (err)
goto out;
err = nfsd_vfs_read(rqstp, fhp, file, offset, vec, vlen, count);
} else /* Note file may still be NULL in NFSv4 special stateid case: */
err = nfsd_read(rqstp, fhp, offset, vec, vlen, count);
out:
return err; return err;
} }

View File

@ -70,10 +70,16 @@ __be32 nfsd_commit(struct svc_rqst *, struct svc_fh *,
__be32 nfsd_open(struct svc_rqst *, struct svc_fh *, umode_t, __be32 nfsd_open(struct svc_rqst *, struct svc_fh *, umode_t,
int, struct file **); int, struct file **);
void nfsd_close(struct file *); void nfsd_close(struct file *);
struct raparms;
__be32 nfsd_get_tmp_read_open(struct svc_rqst *, struct svc_fh *,
struct file **, struct raparms **);
void nfsd_put_tmp_read_open(struct file *, struct raparms *);
int nfsd_splice_read(struct svc_rqst *,
struct file *, loff_t, unsigned long *);
int nfsd_readv(struct file *, loff_t, struct kvec *, int,
unsigned long *);
__be32 nfsd_read(struct svc_rqst *, struct svc_fh *, __be32 nfsd_read(struct svc_rqst *, struct svc_fh *,
loff_t, struct kvec *, int, unsigned long *); loff_t, struct kvec *, int, unsigned long *);
__be32 nfsd_read_file(struct svc_rqst *, struct svc_fh *, struct file *,
loff_t, struct kvec *, int, unsigned long *);
__be32 nfsd_write(struct svc_rqst *, struct svc_fh *,struct file *, __be32 nfsd_write(struct svc_rqst *, struct svc_fh *,struct file *,
loff_t, struct kvec *,int, unsigned long *, int *); loff_t, struct kvec *,int, unsigned long *, int *);
__be32 nfsd_readlink(struct svc_rqst *, struct svc_fh *, __be32 nfsd_readlink(struct svc_rqst *, struct svc_fh *,

View File

@ -58,7 +58,7 @@ struct nfsd4_compound_state {
/* For sessions DRC */ /* For sessions DRC */
struct nfsd4_session *session; struct nfsd4_session *session;
struct nfsd4_slot *slot; struct nfsd4_slot *slot;
__be32 *datap; int data_offset;
size_t iovlen; size_t iovlen;
u32 minorversion; u32 minorversion;
__be32 status; __be32 status;
@ -287,9 +287,8 @@ struct nfsd4_readdir {
struct svc_fh * rd_fhp; /* response */ struct svc_fh * rd_fhp; /* response */
struct readdir_cd common; struct readdir_cd common;
__be32 * buffer; struct xdr_stream *xdr;
int buflen; int cookie_offset;
__be32 * offset;
}; };
struct nfsd4_release_lockowner { struct nfsd4_release_lockowner {
@ -506,9 +505,7 @@ struct nfsd4_compoundargs {
struct nfsd4_compoundres { struct nfsd4_compoundres {
/* scratch variables for XDR encode */ /* scratch variables for XDR encode */
__be32 * p; struct xdr_stream xdr;
__be32 * end;
struct xdr_buf * xbuf;
struct svc_rqst * rqstp; struct svc_rqst * rqstp;
u32 taglen; u32 taglen;
@ -538,6 +535,9 @@ static inline bool nfsd4_last_compound_op(struct svc_rqst *rqstp)
return argp->opcnt == resp->opcnt; return argp->opcnt == resp->opcnt;
} }
int nfsd4_max_reply(struct svc_rqst *rqstp, struct nfsd4_op *op);
void warn_on_nonidempotent_op(struct nfsd4_op *op);
#define NFS4_SVC_XDRSIZE sizeof(struct nfsd4_compoundargs) #define NFS4_SVC_XDRSIZE sizeof(struct nfsd4_compoundargs)
static inline void static inline void
@ -563,9 +563,10 @@ int nfs4svc_encode_compoundres(struct svc_rqst *, __be32 *,
struct nfsd4_compoundres *); struct nfsd4_compoundres *);
__be32 nfsd4_check_resp_size(struct nfsd4_compoundres *, u32); __be32 nfsd4_check_resp_size(struct nfsd4_compoundres *, u32);
void nfsd4_encode_operation(struct nfsd4_compoundres *, struct nfsd4_op *); void nfsd4_encode_operation(struct nfsd4_compoundres *, struct nfsd4_op *);
void nfsd4_encode_replay(struct nfsd4_compoundres *resp, struct nfsd4_op *op); void nfsd4_encode_replay(struct xdr_stream *xdr, struct nfsd4_op *op);
__be32 nfsd4_encode_fattr(struct svc_fh *fhp, struct svc_export *exp, __be32 nfsd4_encode_fattr_to_buf(__be32 **p, int words,
struct dentry *dentry, __be32 **buffer, int countp, struct svc_fh *fhp, struct svc_export *exp,
struct dentry *dentry,
u32 *bmval, struct svc_rqst *, int ignore_crossmnt); u32 *bmval, struct svc_rqst *, int ignore_crossmnt);
extern __be32 nfsd4_setclientid(struct svc_rqst *rqstp, extern __be32 nfsd4_setclientid(struct svc_rqst *rqstp,
struct nfsd4_compound_state *, struct nfsd4_compound_state *,

View File

@ -17,13 +17,13 @@
#include <linux/fs.h> #include <linux/fs.h>
#include <linux/kref.h> #include <linux/kref.h>
#include <linux/utsname.h> #include <linux/utsname.h>
#include <linux/nfsd/nfsfh.h>
#include <linux/lockd/bind.h> #include <linux/lockd/bind.h>
#include <linux/lockd/xdr.h> #include <linux/lockd/xdr.h>
#ifdef CONFIG_LOCKD_V4 #ifdef CONFIG_LOCKD_V4
#include <linux/lockd/xdr4.h> #include <linux/lockd/xdr4.h>
#endif #endif
#include <linux/lockd/debug.h> #include <linux/lockd/debug.h>
#include <linux/sunrpc/svc.h>
/* /*
* Version string * Version string

View File

@ -399,8 +399,6 @@ enum lock_type4 {
#define FATTR4_WORD2_LAYOUT_BLKSIZE (1UL << 1) #define FATTR4_WORD2_LAYOUT_BLKSIZE (1UL << 1)
#define FATTR4_WORD2_MDSTHRESHOLD (1UL << 4) #define FATTR4_WORD2_MDSTHRESHOLD (1UL << 4)
#define FATTR4_WORD2_SECURITY_LABEL (1UL << 16) #define FATTR4_WORD2_SECURITY_LABEL (1UL << 16)
#define FATTR4_WORD2_CHANGE_SECURITY_LABEL \
(1UL << 17)
/* MDS threshold bitmap bits */ /* MDS threshold bitmap bits */
#define THRESHOLD_RD (1UL << 0) #define THRESHOLD_RD (1UL << 0)

View File

@ -1,19 +0,0 @@
/*
* linux/include/linux/nfsd/debug.h
*
* Debugging-related stuff for nfsd
*
* Copyright (C) 1995 Olaf Kirch <okir@monad.swb.de>
*/
#ifndef LINUX_NFSD_DEBUG_H
#define LINUX_NFSD_DEBUG_H
#include <uapi/linux/nfsd/debug.h>
# undef ifdebug
# ifdef NFSD_DEBUG
# define ifdebug(flag) if (nfsd_debug & NFSDDBG_##flag)
# else
# define ifdebug(flag) if (0)
# endif
#endif /* LINUX_NFSD_DEBUG_H */

View File

@ -1,63 +0,0 @@
/*
* include/linux/nfsd/nfsfh.h
*
* This file describes the layout of the file handles as passed
* over the wire.
*
* Earlier versions of knfsd used to sign file handles using keyed MD5
* or SHA. I've removed this code, because it doesn't give you more
* security than blocking external access to port 2049 on your firewall.
*
* Copyright (C) 1995, 1996, 1997 Olaf Kirch <okir@monad.swb.de>
*/
#ifndef _LINUX_NFSD_FH_H
#define _LINUX_NFSD_FH_H
# include <linux/sunrpc/svc.h>
#include <uapi/linux/nfsd/nfsfh.h>
static inline __u32 ino_t_to_u32(ino_t ino)
{
return (__u32) ino;
}
static inline ino_t u32_to_ino_t(__u32 uino)
{
return (ino_t) uino;
}
/*
* This is the internal representation of an NFS handle used in knfsd.
* pre_mtime/post_version will be used to support wcc_attr's in NFSv3.
*/
typedef struct svc_fh {
struct knfsd_fh fh_handle; /* FH data */
struct dentry * fh_dentry; /* validated dentry */
struct svc_export * fh_export; /* export pointer */
int fh_maxsize; /* max size for fh_handle */
unsigned char fh_locked; /* inode locked by us */
unsigned char fh_want_write; /* remount protection taken */
#ifdef CONFIG_NFSD_V3
unsigned char fh_post_saved; /* post-op attrs saved */
unsigned char fh_pre_saved; /* pre-op attrs saved */
/* Pre-op attributes saved during fh_lock */
__u64 fh_pre_size; /* size before operation */
struct timespec fh_pre_mtime; /* mtime before oper */
struct timespec fh_pre_ctime; /* ctime before oper */
/*
* pre-op nfsv4 change attr: note must check IS_I_VERSION(inode)
* to find out if it is valid.
*/
u64 fh_pre_change;
/* Post-op attributes saved in fh_unlock */
struct kstat fh_post_attr; /* full attrs after operation */
u64 fh_post_change; /* nfsv4 change; see above */
#endif /* CONFIG_NFSD_V3 */
} svc_fh;
#endif /* _LINUX_NFSD_FH_H */

View File

@ -244,6 +244,7 @@ struct svc_rqst {
struct page * rq_pages[RPCSVC_MAXPAGES]; struct page * rq_pages[RPCSVC_MAXPAGES];
struct page * *rq_respages; /* points into rq_pages */ struct page * *rq_respages; /* points into rq_pages */
struct page * *rq_next_page; /* next reply page to use */ struct page * *rq_next_page; /* next reply page to use */
struct page * *rq_page_end; /* one past the last page */
struct kvec rq_vec[RPCSVC_MAXPAGES]; /* generally useful.. */ struct kvec rq_vec[RPCSVC_MAXPAGES]; /* generally useful.. */
@ -254,11 +255,15 @@ struct svc_rqst {
u32 rq_prot; /* IP protocol */ u32 rq_prot; /* IP protocol */
unsigned short unsigned short
rq_secure : 1; /* secure port */ rq_secure : 1; /* secure port */
unsigned short rq_local : 1; /* local request */
void * rq_argp; /* decoded arguments */ void * rq_argp; /* decoded arguments */
void * rq_resp; /* xdr'd results */ void * rq_resp; /* xdr'd results */
void * rq_auth_data; /* flavor-specific data */ void * rq_auth_data; /* flavor-specific data */
int rq_auth_slack; /* extra space xdr code
* should leave in head
* for krb5i, krb5p.
*/
int rq_reserved; /* space on socket outq int rq_reserved; /* space on socket outq
* reserved for this request * reserved for this request
*/ */
@ -454,11 +459,7 @@ char * svc_print_addr(struct svc_rqst *, char *, size_t);
*/ */
static inline void svc_reserve_auth(struct svc_rqst *rqstp, int space) static inline void svc_reserve_auth(struct svc_rqst *rqstp, int space)
{ {
int added_space = 0; svc_reserve(rqstp, space + rqstp->rq_auth_slack);
if (rqstp->rq_authop->flavour)
added_space = RPC_MAX_AUTH_SIZE;
svc_reserve(rqstp, space + added_space);
} }
#endif /* SUNRPC_SVC_H */ #endif /* SUNRPC_SVC_H */

View File

@ -115,14 +115,13 @@ struct svc_rdma_fastreg_mr {
struct list_head frmr_list; struct list_head frmr_list;
}; };
struct svc_rdma_req_map { struct svc_rdma_req_map {
struct svc_rdma_fastreg_mr *frmr;
unsigned long count; unsigned long count;
union { union {
struct kvec sge[RPCSVC_MAXPAGES]; struct kvec sge[RPCSVC_MAXPAGES];
struct svc_rdma_chunk_sge ch[RPCSVC_MAXPAGES]; struct svc_rdma_chunk_sge ch[RPCSVC_MAXPAGES];
unsigned long lkey[RPCSVC_MAXPAGES];
}; };
}; };
#define RDMACTXT_F_FAST_UNREG 1
#define RDMACTXT_F_LAST_CTXT 2 #define RDMACTXT_F_LAST_CTXT 2
#define SVCRDMA_DEVCAP_FAST_REG 1 /* fast mr registration */ #define SVCRDMA_DEVCAP_FAST_REG 1 /* fast mr registration */

View File

@ -24,6 +24,7 @@ struct svc_xprt_ops {
void (*xpo_release_rqst)(struct svc_rqst *); void (*xpo_release_rqst)(struct svc_rqst *);
void (*xpo_detach)(struct svc_xprt *); void (*xpo_detach)(struct svc_xprt *);
void (*xpo_free)(struct svc_xprt *); void (*xpo_free)(struct svc_xprt *);
int (*xpo_secure_port)(struct svc_rqst *);
}; };
struct svc_xprt_class { struct svc_xprt_class {
@ -63,6 +64,7 @@ struct svc_xprt {
#define XPT_DETACHED 10 /* detached from tempsocks list */ #define XPT_DETACHED 10 /* detached from tempsocks list */
#define XPT_LISTENER 11 /* listening endpoint */ #define XPT_LISTENER 11 /* listening endpoint */
#define XPT_CACHE_AUTH 12 /* cache auth info */ #define XPT_CACHE_AUTH 12 /* cache auth info */
#define XPT_LOCAL 13 /* connection from loopback interface */
struct svc_serv *xpt_server; /* service for transport */ struct svc_serv *xpt_server; /* service for transport */
atomic_t xpt_reserved; /* space on outq that is rsvd */ atomic_t xpt_reserved; /* space on outq that is rsvd */

View File

@ -215,6 +215,9 @@ typedef int (*kxdrdproc_t)(void *rqstp, struct xdr_stream *xdr, void *obj);
extern void xdr_init_encode(struct xdr_stream *xdr, struct xdr_buf *buf, __be32 *p); extern void xdr_init_encode(struct xdr_stream *xdr, struct xdr_buf *buf, __be32 *p);
extern __be32 *xdr_reserve_space(struct xdr_stream *xdr, size_t nbytes); extern __be32 *xdr_reserve_space(struct xdr_stream *xdr, size_t nbytes);
extern void xdr_commit_encode(struct xdr_stream *xdr);
extern void xdr_truncate_encode(struct xdr_stream *xdr, size_t len);
extern int xdr_restrict_buflen(struct xdr_stream *xdr, int newbuflen);
extern void xdr_write_pages(struct xdr_stream *xdr, struct page **pages, extern void xdr_write_pages(struct xdr_stream *xdr, struct page **pages,
unsigned int base, unsigned int len); unsigned int base, unsigned int len);
extern unsigned int xdr_stream_pos(const struct xdr_stream *xdr); extern unsigned int xdr_stream_pos(const struct xdr_stream *xdr);

View File

@ -1,13 +1,7 @@
/* /*
* include/linux/nfsd/nfsfh.h
*
* This file describes the layout of the file handles as passed * This file describes the layout of the file handles as passed
* over the wire. * over the wire.
* *
* Earlier versions of knfsd used to sign file handles using keyed MD5
* or SHA. I've removed this code, because it doesn't give you more
* security than blocking external access to port 2049 on your firewall.
*
* Copyright (C) 1995, 1996, 1997 Olaf Kirch <okir@monad.swb.de> * Copyright (C) 1995, 1996, 1997 Olaf Kirch <okir@monad.swb.de>
*/ */
@ -37,7 +31,7 @@ struct nfs_fhbase_old {
}; };
/* /*
* This is the new flexible, extensible style NFSv2/v3 file handle. * This is the new flexible, extensible style NFSv2/v3/v4 file handle.
* by Neil Brown <neilb@cse.unsw.edu.au> - March 2000 * by Neil Brown <neilb@cse.unsw.edu.au> - March 2000
* *
* The file handle starts with a sequence of four-byte words. * The file handle starts with a sequence of four-byte words.
@ -47,14 +41,7 @@ struct nfs_fhbase_old {
* *
* All four-byte values are in host-byte-order. * All four-byte values are in host-byte-order.
* *
* The auth_type field specifies how the filehandle can be authenticated * The auth_type field is deprecated and must be set to 0.
* This might allow a file to be confirmed to be in a writable part of a
* filetree without checking the path from it up to the root.
* Current values:
* 0 - No authentication. fb_auth is 0 bytes long
* Possible future values:
* 1 - 4 bytes taken from MD5 hash of the remainer of the file handle
* prefixed by a secret and with the important export flags.
* *
* The fsid_type identifies how the filesystem (or export point) is * The fsid_type identifies how the filesystem (or export point) is
* encoded. * encoded.
@ -71,14 +58,9 @@ struct nfs_fhbase_old {
* 7 - 8 byte inode number and 16 byte uuid * 7 - 8 byte inode number and 16 byte uuid
* *
* The fileid_type identified how the file within the filesystem is encoded. * The fileid_type identified how the file within the filesystem is encoded.
* This is (will be) passed to, and set by, the underlying filesystem if it supports * The values for this field are filesystem specific, exccept that
* filehandle operations. The filesystem must not use the value '0' or '0xff' and may * filesystems must not use the values '0' or '0xff'. 'See enum fid_type'
* only use the values 1 and 2 as defined below: * in include/linux/exportfs.h for currently registered values.
* Current values:
* 0 - The root, or export point, of the filesystem. fb_fileid is 0 bytes.
* 1 - 32bit inode number, 32 bit generation number.
* 2 - 32bit inode number, 32 bit generation number, 32 bit parent directory inode number.
*
*/ */
struct nfs_fhbase_new { struct nfs_fhbase_new {
__u8 fb_version; /* == 1, even => nfs_fhbase_old */ __u8 fb_version; /* == 1, even => nfs_fhbase_old */
@ -114,9 +96,9 @@ struct knfsd_fh {
#define fh_fsid_type fh_base.fh_new.fb_fsid_type #define fh_fsid_type fh_base.fh_new.fb_fsid_type
#define fh_auth_type fh_base.fh_new.fb_auth_type #define fh_auth_type fh_base.fh_new.fb_auth_type
#define fh_fileid_type fh_base.fh_new.fb_fileid_type #define fh_fileid_type fh_base.fh_new.fb_fileid_type
#define fh_auth fh_base.fh_new.fb_auth
#define fh_fsid fh_base.fh_new.fb_auth #define fh_fsid fh_base.fh_new.fb_auth
/* Do not use, provided for userspace compatiblity. */
#define fh_auth fh_base.fh_new.fb_auth
#endif /* _UAPI_LINUX_NFSD_FH_H */ #endif /* _UAPI_LINUX_NFSD_FH_H */

View File

@ -1503,6 +1503,7 @@ svcauth_gss_accept(struct svc_rqst *rqstp, __be32 *authp)
if (unwrap_integ_data(rqstp, &rqstp->rq_arg, if (unwrap_integ_data(rqstp, &rqstp->rq_arg,
gc->gc_seq, rsci->mechctx)) gc->gc_seq, rsci->mechctx))
goto garbage_args; goto garbage_args;
rqstp->rq_auth_slack = RPC_MAX_AUTH_SIZE;
break; break;
case RPC_GSS_SVC_PRIVACY: case RPC_GSS_SVC_PRIVACY:
/* placeholders for length and seq. number: */ /* placeholders for length and seq. number: */
@ -1511,6 +1512,7 @@ svcauth_gss_accept(struct svc_rqst *rqstp, __be32 *authp)
if (unwrap_priv_data(rqstp, &rqstp->rq_arg, if (unwrap_priv_data(rqstp, &rqstp->rq_arg,
gc->gc_seq, rsci->mechctx)) gc->gc_seq, rsci->mechctx))
goto garbage_args; goto garbage_args;
rqstp->rq_auth_slack = RPC_MAX_AUTH_SIZE * 2;
break; break;
default: default:
goto auth_err; goto auth_err;

View File

@ -374,7 +374,7 @@ void sunrpc_destroy_cache_detail(struct cache_detail *cd)
} }
return; return;
out: out:
printk(KERN_ERR "nfsd: failed to unregister %s cache\n", cd->name); printk(KERN_ERR "RPC: failed to unregister %s cache\n", cd->name);
} }
EXPORT_SYMBOL_GPL(sunrpc_destroy_cache_detail); EXPORT_SYMBOL_GPL(sunrpc_destroy_cache_detail);

View File

@ -43,6 +43,19 @@ static inline int rpc_reply_expected(struct rpc_task *task)
(task->tk_msg.rpc_proc->p_decode != NULL); (task->tk_msg.rpc_proc->p_decode != NULL);
} }
static inline int sock_is_loopback(struct sock *sk)
{
struct dst_entry *dst;
int loopback = 0;
rcu_read_lock();
dst = rcu_dereference(sk->sk_dst_cache);
if (dst && dst->dev &&
(dst->dev->features & NETIF_F_LOOPBACK))
loopback = 1;
rcu_read_unlock();
return loopback;
}
int svc_send_common(struct socket *sock, struct xdr_buf *xdr, int svc_send_common(struct socket *sock, struct xdr_buf *xdr,
struct page *headpage, unsigned long headoffset, struct page *headpage, unsigned long headoffset,
struct page *tailpage, unsigned long tailoffset); struct page *tailpage, unsigned long tailoffset);

View File

@ -597,6 +597,7 @@ static int svc_alloc_arg(struct svc_rqst *rqstp)
} }
rqstp->rq_pages[i] = p; rqstp->rq_pages[i] = p;
} }
rqstp->rq_page_end = &rqstp->rq_pages[i];
rqstp->rq_pages[i++] = NULL; /* this might be seen in nfs_read_actor */ rqstp->rq_pages[i++] = NULL; /* this might be seen in nfs_read_actor */
/* Make arg->head point to first page and arg->pages point to rest */ /* Make arg->head point to first page and arg->pages point to rest */
@ -730,6 +731,8 @@ static int svc_handle_xprt(struct svc_rqst *rqstp, struct svc_xprt *xprt)
newxpt = xprt->xpt_ops->xpo_accept(xprt); newxpt = xprt->xpt_ops->xpo_accept(xprt);
if (newxpt) if (newxpt)
svc_add_new_temp_xprt(serv, newxpt); svc_add_new_temp_xprt(serv, newxpt);
else
module_put(xprt->xpt_class->xcl_owner);
} else if (xprt->xpt_ops->xpo_has_wspace(xprt)) { } else if (xprt->xpt_ops->xpo_has_wspace(xprt)) {
/* XPT_DATA|XPT_DEFERRED case: */ /* XPT_DATA|XPT_DEFERRED case: */
dprintk("svc: server %p, pool %u, transport %p, inuse=%d\n", dprintk("svc: server %p, pool %u, transport %p, inuse=%d\n",
@ -793,7 +796,7 @@ int svc_recv(struct svc_rqst *rqstp, long timeout)
clear_bit(XPT_OLD, &xprt->xpt_flags); clear_bit(XPT_OLD, &xprt->xpt_flags);
rqstp->rq_secure = svc_port_is_privileged(svc_addr(rqstp)); rqstp->rq_secure = xprt->xpt_ops->xpo_secure_port(rqstp);
rqstp->rq_chandle.defer = svc_defer; rqstp->rq_chandle.defer = svc_defer;
if (serv->sv_stats) if (serv->sv_stats)

View File

@ -54,6 +54,8 @@ svc_authenticate(struct svc_rqst *rqstp, __be32 *authp)
} }
spin_unlock(&authtab_lock); spin_unlock(&authtab_lock);
rqstp->rq_auth_slack = 0;
rqstp->rq_authop = aops; rqstp->rq_authop = aops;
return aops->accept(rqstp, authp); return aops->accept(rqstp, authp);
} }

View File

@ -400,6 +400,12 @@ static void svc_sock_setbufsize(struct socket *sock, unsigned int snd,
release_sock(sock->sk); release_sock(sock->sk);
#endif #endif
} }
static int svc_sock_secure_port(struct svc_rqst *rqstp)
{
return svc_port_is_privileged(svc_addr(rqstp));
}
/* /*
* INET callback when data has been received on the socket. * INET callback when data has been received on the socket.
*/ */
@ -678,6 +684,7 @@ static struct svc_xprt_ops svc_udp_ops = {
.xpo_prep_reply_hdr = svc_udp_prep_reply_hdr, .xpo_prep_reply_hdr = svc_udp_prep_reply_hdr,
.xpo_has_wspace = svc_udp_has_wspace, .xpo_has_wspace = svc_udp_has_wspace,
.xpo_accept = svc_udp_accept, .xpo_accept = svc_udp_accept,
.xpo_secure_port = svc_sock_secure_port,
}; };
static struct svc_xprt_class svc_udp_class = { static struct svc_xprt_class svc_udp_class = {
@ -842,8 +849,7 @@ static struct svc_xprt *svc_tcp_accept(struct svc_xprt *xprt)
* tell us anything. For now just warn about unpriv connections. * tell us anything. For now just warn about unpriv connections.
*/ */
if (!svc_port_is_privileged(sin)) { if (!svc_port_is_privileged(sin)) {
dprintk(KERN_WARNING dprintk("%s: connect from unprivileged port: %s\n",
"%s: connect from unprivileged port: %s\n",
serv->sv_name, serv->sv_name,
__svc_print_addr(sin, buf, sizeof(buf))); __svc_print_addr(sin, buf, sizeof(buf)));
} }
@ -867,6 +873,10 @@ static struct svc_xprt *svc_tcp_accept(struct svc_xprt *xprt)
} }
svc_xprt_set_local(&newsvsk->sk_xprt, sin, slen); svc_xprt_set_local(&newsvsk->sk_xprt, sin, slen);
if (sock_is_loopback(newsock->sk))
set_bit(XPT_LOCAL, &newsvsk->sk_xprt.xpt_flags);
else
clear_bit(XPT_LOCAL, &newsvsk->sk_xprt.xpt_flags);
if (serv->sv_stats) if (serv->sv_stats)
serv->sv_stats->nettcpconn++; serv->sv_stats->nettcpconn++;
@ -1112,6 +1122,7 @@ static int svc_tcp_recvfrom(struct svc_rqst *rqstp)
rqstp->rq_xprt_ctxt = NULL; rqstp->rq_xprt_ctxt = NULL;
rqstp->rq_prot = IPPROTO_TCP; rqstp->rq_prot = IPPROTO_TCP;
rqstp->rq_local = !!test_bit(XPT_LOCAL, &svsk->sk_xprt.xpt_flags);
p = (__be32 *)rqstp->rq_arg.head[0].iov_base; p = (__be32 *)rqstp->rq_arg.head[0].iov_base;
calldir = p[1]; calldir = p[1];
@ -1234,6 +1245,7 @@ static struct svc_xprt_ops svc_tcp_bc_ops = {
.xpo_detach = svc_bc_tcp_sock_detach, .xpo_detach = svc_bc_tcp_sock_detach,
.xpo_free = svc_bc_sock_free, .xpo_free = svc_bc_sock_free,
.xpo_prep_reply_hdr = svc_tcp_prep_reply_hdr, .xpo_prep_reply_hdr = svc_tcp_prep_reply_hdr,
.xpo_secure_port = svc_sock_secure_port,
}; };
static struct svc_xprt_class svc_tcp_bc_class = { static struct svc_xprt_class svc_tcp_bc_class = {
@ -1272,6 +1284,7 @@ static struct svc_xprt_ops svc_tcp_ops = {
.xpo_prep_reply_hdr = svc_tcp_prep_reply_hdr, .xpo_prep_reply_hdr = svc_tcp_prep_reply_hdr,
.xpo_has_wspace = svc_tcp_has_wspace, .xpo_has_wspace = svc_tcp_has_wspace,
.xpo_accept = svc_tcp_accept, .xpo_accept = svc_tcp_accept,
.xpo_secure_port = svc_sock_secure_port,
}; };
static struct svc_xprt_class svc_tcp_class = { static struct svc_xprt_class svc_tcp_class = {

View File

@ -462,6 +462,7 @@ void xdr_init_encode(struct xdr_stream *xdr, struct xdr_buf *buf, __be32 *p)
struct kvec *iov = buf->head; struct kvec *iov = buf->head;
int scratch_len = buf->buflen - buf->page_len - buf->tail[0].iov_len; int scratch_len = buf->buflen - buf->page_len - buf->tail[0].iov_len;
xdr_set_scratch_buffer(xdr, NULL, 0);
BUG_ON(scratch_len < 0); BUG_ON(scratch_len < 0);
xdr->buf = buf; xdr->buf = buf;
xdr->iov = iov; xdr->iov = iov;
@ -481,6 +482,73 @@ void xdr_init_encode(struct xdr_stream *xdr, struct xdr_buf *buf, __be32 *p)
} }
EXPORT_SYMBOL_GPL(xdr_init_encode); EXPORT_SYMBOL_GPL(xdr_init_encode);
/**
* xdr_commit_encode - Ensure all data is written to buffer
* @xdr: pointer to xdr_stream
*
* We handle encoding across page boundaries by giving the caller a
* temporary location to write to, then later copying the data into
* place; xdr_commit_encode does that copying.
*
* Normally the caller doesn't need to call this directly, as the
* following xdr_reserve_space will do it. But an explicit call may be
* required at the end of encoding, or any other time when the xdr_buf
* data might be read.
*/
void xdr_commit_encode(struct xdr_stream *xdr)
{
int shift = xdr->scratch.iov_len;
void *page;
if (shift == 0)
return;
page = page_address(*xdr->page_ptr);
memcpy(xdr->scratch.iov_base, page, shift);
memmove(page, page + shift, (void *)xdr->p - page);
xdr->scratch.iov_len = 0;
}
EXPORT_SYMBOL_GPL(xdr_commit_encode);
__be32 *xdr_get_next_encode_buffer(struct xdr_stream *xdr, size_t nbytes)
{
static __be32 *p;
int space_left;
int frag1bytes, frag2bytes;
if (nbytes > PAGE_SIZE)
return NULL; /* Bigger buffers require special handling */
if (xdr->buf->len + nbytes > xdr->buf->buflen)
return NULL; /* Sorry, we're totally out of space */
frag1bytes = (xdr->end - xdr->p) << 2;
frag2bytes = nbytes - frag1bytes;
if (xdr->iov)
xdr->iov->iov_len += frag1bytes;
else
xdr->buf->page_len += frag1bytes;
xdr->page_ptr++;
xdr->iov = NULL;
/*
* If the last encode didn't end exactly on a page boundary, the
* next one will straddle boundaries. Encode into the next
* page, then copy it back later in xdr_commit_encode. We use
* the "scratch" iov to track any temporarily unused fragment of
* space at the end of the previous buffer:
*/
xdr->scratch.iov_base = xdr->p;
xdr->scratch.iov_len = frag1bytes;
p = page_address(*xdr->page_ptr);
/*
* Note this is where the next encode will start after we've
* shifted this one back:
*/
xdr->p = (void *)p + frag2bytes;
space_left = xdr->buf->buflen - xdr->buf->len;
xdr->end = (void *)p + min_t(int, space_left, PAGE_SIZE);
xdr->buf->page_len += frag2bytes;
xdr->buf->len += nbytes;
return p;
}
/** /**
* xdr_reserve_space - Reserve buffer space for sending * xdr_reserve_space - Reserve buffer space for sending
* @xdr: pointer to xdr_stream * @xdr: pointer to xdr_stream
@ -495,19 +563,121 @@ __be32 * xdr_reserve_space(struct xdr_stream *xdr, size_t nbytes)
__be32 *p = xdr->p; __be32 *p = xdr->p;
__be32 *q; __be32 *q;
xdr_commit_encode(xdr);
/* align nbytes on the next 32-bit boundary */ /* align nbytes on the next 32-bit boundary */
nbytes += 3; nbytes += 3;
nbytes &= ~3; nbytes &= ~3;
q = p + (nbytes >> 2); q = p + (nbytes >> 2);
if (unlikely(q > xdr->end || q < p)) if (unlikely(q > xdr->end || q < p))
return NULL; return xdr_get_next_encode_buffer(xdr, nbytes);
xdr->p = q; xdr->p = q;
if (xdr->iov)
xdr->iov->iov_len += nbytes; xdr->iov->iov_len += nbytes;
else
xdr->buf->page_len += nbytes;
xdr->buf->len += nbytes; xdr->buf->len += nbytes;
return p; return p;
} }
EXPORT_SYMBOL_GPL(xdr_reserve_space); EXPORT_SYMBOL_GPL(xdr_reserve_space);
/**
* xdr_truncate_encode - truncate an encode buffer
* @xdr: pointer to xdr_stream
* @len: new length of buffer
*
* Truncates the xdr stream, so that xdr->buf->len == len,
* and xdr->p points at offset len from the start of the buffer, and
* head, tail, and page lengths are adjusted to correspond.
*
* If this means moving xdr->p to a different buffer, we assume that
* that the end pointer should be set to the end of the current page,
* except in the case of the head buffer when we assume the head
* buffer's current length represents the end of the available buffer.
*
* This is *not* safe to use on a buffer that already has inlined page
* cache pages (as in a zero-copy server read reply), except for the
* simple case of truncating from one position in the tail to another.
*
*/
void xdr_truncate_encode(struct xdr_stream *xdr, size_t len)
{
struct xdr_buf *buf = xdr->buf;
struct kvec *head = buf->head;
struct kvec *tail = buf->tail;
int fraglen;
int new, old;
if (len > buf->len) {
WARN_ON_ONCE(1);
return;
}
xdr_commit_encode(xdr);
fraglen = min_t(int, buf->len - len, tail->iov_len);
tail->iov_len -= fraglen;
buf->len -= fraglen;
if (tail->iov_len && buf->len == len) {
xdr->p = tail->iov_base + tail->iov_len;
/* xdr->end, xdr->iov should be set already */
return;
}
WARN_ON_ONCE(fraglen);
fraglen = min_t(int, buf->len - len, buf->page_len);
buf->page_len -= fraglen;
buf->len -= fraglen;
new = buf->page_base + buf->page_len;
old = new + fraglen;
xdr->page_ptr -= (old >> PAGE_SHIFT) - (new >> PAGE_SHIFT);
if (buf->page_len && buf->len == len) {
xdr->p = page_address(*xdr->page_ptr);
xdr->end = (void *)xdr->p + PAGE_SIZE;
xdr->p = (void *)xdr->p + (new % PAGE_SIZE);
/* xdr->iov should already be NULL */
return;
}
if (fraglen) {
xdr->end = head->iov_base + head->iov_len;
xdr->page_ptr--;
}
/* (otherwise assume xdr->end is already set) */
head->iov_len = len;
buf->len = len;
xdr->p = head->iov_base + head->iov_len;
xdr->iov = buf->head;
}
EXPORT_SYMBOL(xdr_truncate_encode);
/**
* xdr_restrict_buflen - decrease available buffer space
* @xdr: pointer to xdr_stream
* @newbuflen: new maximum number of bytes available
*
* Adjust our idea of how much space is available in the buffer.
* If we've already used too much space in the buffer, returns -1.
* If the available space is already smaller than newbuflen, returns 0
* and does nothing. Otherwise, adjusts xdr->buf->buflen to newbuflen
* and ensures xdr->end is set at most offset newbuflen from the start
* of the buffer.
*/
int xdr_restrict_buflen(struct xdr_stream *xdr, int newbuflen)
{
struct xdr_buf *buf = xdr->buf;
int left_in_this_buf = (void *)xdr->end - (void *)xdr->p;
int end_offset = buf->len + left_in_this_buf;
if (newbuflen < 0 || newbuflen < buf->len)
return -1;
if (newbuflen > buf->buflen)
return 0;
if (newbuflen < end_offset)
xdr->end = (void *)xdr->end + newbuflen - end_offset;
buf->buflen = newbuflen;
return 0;
}
EXPORT_SYMBOL(xdr_restrict_buflen);
/** /**
* xdr_write_pages - Insert a list of pages into an XDR buffer for sending * xdr_write_pages - Insert a list of pages into an XDR buffer for sending
* @xdr: pointer to xdr_stream * @xdr: pointer to xdr_stream

View File

@ -1,4 +1,5 @@
/* /*
* Copyright (c) 2014 Open Grid Computing, Inc. All rights reserved.
* Copyright (c) 2005-2006 Network Appliance, Inc. All rights reserved. * Copyright (c) 2005-2006 Network Appliance, Inc. All rights reserved.
* *
* This software is available to you under a choice of one of two * This software is available to you under a choice of one of two
@ -69,7 +70,8 @@ static void rdma_build_arg_xdr(struct svc_rqst *rqstp,
/* Set up the XDR head */ /* Set up the XDR head */
rqstp->rq_arg.head[0].iov_base = page_address(page); rqstp->rq_arg.head[0].iov_base = page_address(page);
rqstp->rq_arg.head[0].iov_len = min(byte_count, ctxt->sge[0].length); rqstp->rq_arg.head[0].iov_len =
min_t(size_t, byte_count, ctxt->sge[0].length);
rqstp->rq_arg.len = byte_count; rqstp->rq_arg.len = byte_count;
rqstp->rq_arg.buflen = byte_count; rqstp->rq_arg.buflen = byte_count;
@ -85,7 +87,7 @@ static void rdma_build_arg_xdr(struct svc_rqst *rqstp,
page = ctxt->pages[sge_no]; page = ctxt->pages[sge_no];
put_page(rqstp->rq_pages[sge_no]); put_page(rqstp->rq_pages[sge_no]);
rqstp->rq_pages[sge_no] = page; rqstp->rq_pages[sge_no] = page;
bc -= min(bc, ctxt->sge[sge_no].length); bc -= min_t(u32, bc, ctxt->sge[sge_no].length);
rqstp->rq_arg.buflen += ctxt->sge[sge_no].length; rqstp->rq_arg.buflen += ctxt->sge[sge_no].length;
sge_no++; sge_no++;
} }
@ -113,291 +115,265 @@ static void rdma_build_arg_xdr(struct svc_rqst *rqstp,
rqstp->rq_arg.tail[0].iov_len = 0; rqstp->rq_arg.tail[0].iov_len = 0;
} }
/* Encode a read-chunk-list as an array of IB SGE
*
* Assumptions:
* - chunk[0]->position points to pages[0] at an offset of 0
* - pages[] is not physically or virtually contiguous and consists of
* PAGE_SIZE elements.
*
* Output:
* - sge array pointing into pages[] array.
* - chunk_sge array specifying sge index and count for each
* chunk in the read list
*
*/
static int map_read_chunks(struct svcxprt_rdma *xprt,
struct svc_rqst *rqstp,
struct svc_rdma_op_ctxt *head,
struct rpcrdma_msg *rmsgp,
struct svc_rdma_req_map *rpl_map,
struct svc_rdma_req_map *chl_map,
int ch_count,
int byte_count)
{
int sge_no;
int sge_bytes;
int page_off;
int page_no;
int ch_bytes;
int ch_no;
struct rpcrdma_read_chunk *ch;
sge_no = 0;
page_no = 0;
page_off = 0;
ch = (struct rpcrdma_read_chunk *)&rmsgp->rm_body.rm_chunks[0];
ch_no = 0;
ch_bytes = ntohl(ch->rc_target.rs_length);
head->arg.head[0] = rqstp->rq_arg.head[0];
head->arg.tail[0] = rqstp->rq_arg.tail[0];
head->arg.pages = &head->pages[head->count];
head->hdr_count = head->count; /* save count of hdr pages */
head->arg.page_base = 0;
head->arg.page_len = ch_bytes;
head->arg.len = rqstp->rq_arg.len + ch_bytes;
head->arg.buflen = rqstp->rq_arg.buflen + ch_bytes;
head->count++;
chl_map->ch[0].start = 0;
while (byte_count) {
rpl_map->sge[sge_no].iov_base =
page_address(rqstp->rq_arg.pages[page_no]) + page_off;
sge_bytes = min_t(int, PAGE_SIZE-page_off, ch_bytes);
rpl_map->sge[sge_no].iov_len = sge_bytes;
/*
* Don't bump head->count here because the same page
* may be used by multiple SGE.
*/
head->arg.pages[page_no] = rqstp->rq_arg.pages[page_no];
rqstp->rq_respages = &rqstp->rq_arg.pages[page_no+1];
rqstp->rq_next_page = rqstp->rq_respages + 1;
byte_count -= sge_bytes;
ch_bytes -= sge_bytes;
sge_no++;
/*
* If all bytes for this chunk have been mapped to an
* SGE, move to the next SGE
*/
if (ch_bytes == 0) {
chl_map->ch[ch_no].count =
sge_no - chl_map->ch[ch_no].start;
ch_no++;
ch++;
chl_map->ch[ch_no].start = sge_no;
ch_bytes = ntohl(ch->rc_target.rs_length);
/* If bytes remaining account for next chunk */
if (byte_count) {
head->arg.page_len += ch_bytes;
head->arg.len += ch_bytes;
head->arg.buflen += ch_bytes;
}
}
/*
* If this SGE consumed all of the page, move to the
* next page
*/
if ((sge_bytes + page_off) == PAGE_SIZE) {
page_no++;
page_off = 0;
/*
* If there are still bytes left to map, bump
* the page count
*/
if (byte_count)
head->count++;
} else
page_off += sge_bytes;
}
BUG_ON(byte_count != 0);
return sge_no;
}
/* Map a read-chunk-list to an XDR and fast register the page-list.
*
* Assumptions:
* - chunk[0] position points to pages[0] at an offset of 0
* - pages[] will be made physically contiguous by creating a one-off memory
* region using the fastreg verb.
* - byte_count is # of bytes in read-chunk-list
* - ch_count is # of chunks in read-chunk-list
*
* Output:
* - sge array pointing into pages[] array.
* - chunk_sge array specifying sge index and count for each
* chunk in the read list
*/
static int fast_reg_read_chunks(struct svcxprt_rdma *xprt,
struct svc_rqst *rqstp,
struct svc_rdma_op_ctxt *head,
struct rpcrdma_msg *rmsgp,
struct svc_rdma_req_map *rpl_map,
struct svc_rdma_req_map *chl_map,
int ch_count,
int byte_count)
{
int page_no;
int ch_no;
u32 offset;
struct rpcrdma_read_chunk *ch;
struct svc_rdma_fastreg_mr *frmr;
int ret = 0;
frmr = svc_rdma_get_frmr(xprt);
if (IS_ERR(frmr))
return -ENOMEM;
head->frmr = frmr;
head->arg.head[0] = rqstp->rq_arg.head[0];
head->arg.tail[0] = rqstp->rq_arg.tail[0];
head->arg.pages = &head->pages[head->count];
head->hdr_count = head->count; /* save count of hdr pages */
head->arg.page_base = 0;
head->arg.page_len = byte_count;
head->arg.len = rqstp->rq_arg.len + byte_count;
head->arg.buflen = rqstp->rq_arg.buflen + byte_count;
/* Fast register the page list */
frmr->kva = page_address(rqstp->rq_arg.pages[0]);
frmr->direction = DMA_FROM_DEVICE;
frmr->access_flags = (IB_ACCESS_LOCAL_WRITE|IB_ACCESS_REMOTE_WRITE);
frmr->map_len = byte_count;
frmr->page_list_len = PAGE_ALIGN(byte_count) >> PAGE_SHIFT;
for (page_no = 0; page_no < frmr->page_list_len; page_no++) {
frmr->page_list->page_list[page_no] =
ib_dma_map_page(xprt->sc_cm_id->device,
rqstp->rq_arg.pages[page_no], 0,
PAGE_SIZE, DMA_FROM_DEVICE);
if (ib_dma_mapping_error(xprt->sc_cm_id->device,
frmr->page_list->page_list[page_no]))
goto fatal_err;
atomic_inc(&xprt->sc_dma_used);
head->arg.pages[page_no] = rqstp->rq_arg.pages[page_no];
}
head->count += page_no;
/* rq_respages points one past arg pages */
rqstp->rq_respages = &rqstp->rq_arg.pages[page_no];
rqstp->rq_next_page = rqstp->rq_respages + 1;
/* Create the reply and chunk maps */
offset = 0;
ch = (struct rpcrdma_read_chunk *)&rmsgp->rm_body.rm_chunks[0];
for (ch_no = 0; ch_no < ch_count; ch_no++) {
int len = ntohl(ch->rc_target.rs_length);
rpl_map->sge[ch_no].iov_base = frmr->kva + offset;
rpl_map->sge[ch_no].iov_len = len;
chl_map->ch[ch_no].count = 1;
chl_map->ch[ch_no].start = ch_no;
offset += len;
ch++;
}
ret = svc_rdma_fastreg(xprt, frmr);
if (ret)
goto fatal_err;
return ch_no;
fatal_err:
printk("svcrdma: error fast registering xdr for xprt %p", xprt);
svc_rdma_put_frmr(xprt, frmr);
return -EIO;
}
static int rdma_set_ctxt_sge(struct svcxprt_rdma *xprt,
struct svc_rdma_op_ctxt *ctxt,
struct svc_rdma_fastreg_mr *frmr,
struct kvec *vec,
u64 *sgl_offset,
int count)
{
int i;
unsigned long off;
ctxt->count = count;
ctxt->direction = DMA_FROM_DEVICE;
for (i = 0; i < count; i++) {
ctxt->sge[i].length = 0; /* in case map fails */
if (!frmr) {
BUG_ON(!virt_to_page(vec[i].iov_base));
off = (unsigned long)vec[i].iov_base & ~PAGE_MASK;
ctxt->sge[i].addr =
ib_dma_map_page(xprt->sc_cm_id->device,
virt_to_page(vec[i].iov_base),
off,
vec[i].iov_len,
DMA_FROM_DEVICE);
if (ib_dma_mapping_error(xprt->sc_cm_id->device,
ctxt->sge[i].addr))
return -EINVAL;
ctxt->sge[i].lkey = xprt->sc_dma_lkey;
atomic_inc(&xprt->sc_dma_used);
} else {
ctxt->sge[i].addr = (unsigned long)vec[i].iov_base;
ctxt->sge[i].lkey = frmr->mr->lkey;
}
ctxt->sge[i].length = vec[i].iov_len;
*sgl_offset = *sgl_offset + vec[i].iov_len;
}
return 0;
}
static int rdma_read_max_sge(struct svcxprt_rdma *xprt, int sge_count) static int rdma_read_max_sge(struct svcxprt_rdma *xprt, int sge_count)
{ {
if ((rdma_node_get_transport(xprt->sc_cm_id->device->node_type) == if (rdma_node_get_transport(xprt->sc_cm_id->device->node_type) ==
RDMA_TRANSPORT_IWARP) && RDMA_TRANSPORT_IWARP)
sge_count > 1)
return 1; return 1;
else else
return min_t(int, sge_count, xprt->sc_max_sge); return min_t(int, sge_count, xprt->sc_max_sge);
} }
/* typedef int (*rdma_reader_fn)(struct svcxprt_rdma *xprt,
* Use RDMA_READ to read data from the advertised client buffer into the
* XDR stream starting at rq_arg.head[0].iov_base.
* Each chunk in the array
* contains the following fields:
* discrim - '1', This isn't used for data placement
* position - The xdr stream offset (the same for every chunk)
* handle - RMR for client memory region
* length - data transfer length
* offset - 64 bit tagged offset in remote memory region
*
* On our side, we need to read into a pagelist. The first page immediately
* follows the RPC header.
*
* This function returns:
* 0 - No error and no read-list found.
*
* 1 - Successful read-list processing. The data is not yet in
* the pagelist and therefore the RPC request must be deferred. The
* I/O completion will enqueue the transport again and
* svc_rdma_recvfrom will complete the request.
*
* <0 - Error processing/posting read-list.
*
* NOTE: The ctxt must not be touched after the last WR has been posted
* because the I/O completion processing may occur on another
* processor and free / modify the context. Ne touche pas!
*/
static int rdma_read_xdr(struct svcxprt_rdma *xprt,
struct rpcrdma_msg *rmsgp,
struct svc_rqst *rqstp, struct svc_rqst *rqstp,
struct svc_rdma_op_ctxt *hdr_ctxt) struct svc_rdma_op_ctxt *head,
int *page_no,
u32 *page_offset,
u32 rs_handle,
u32 rs_length,
u64 rs_offset,
int last);
/* Issue an RDMA_READ using the local lkey to map the data sink */
static int rdma_read_chunk_lcl(struct svcxprt_rdma *xprt,
struct svc_rqst *rqstp,
struct svc_rdma_op_ctxt *head,
int *page_no,
u32 *page_offset,
u32 rs_handle,
u32 rs_length,
u64 rs_offset,
int last)
{
struct ib_send_wr read_wr;
int pages_needed = PAGE_ALIGN(*page_offset + rs_length) >> PAGE_SHIFT;
struct svc_rdma_op_ctxt *ctxt = svc_rdma_get_context(xprt);
int ret, read, pno;
u32 pg_off = *page_offset;
u32 pg_no = *page_no;
ctxt->direction = DMA_FROM_DEVICE;
ctxt->read_hdr = head;
pages_needed =
min_t(int, pages_needed, rdma_read_max_sge(xprt, pages_needed));
read = min_t(int, pages_needed << PAGE_SHIFT, rs_length);
for (pno = 0; pno < pages_needed; pno++) {
int len = min_t(int, rs_length, PAGE_SIZE - pg_off);
head->arg.pages[pg_no] = rqstp->rq_arg.pages[pg_no];
head->arg.page_len += len;
head->arg.len += len;
if (!pg_off)
head->count++;
rqstp->rq_respages = &rqstp->rq_arg.pages[pg_no+1];
rqstp->rq_next_page = rqstp->rq_respages + 1;
ctxt->sge[pno].addr =
ib_dma_map_page(xprt->sc_cm_id->device,
head->arg.pages[pg_no], pg_off,
PAGE_SIZE - pg_off,
DMA_FROM_DEVICE);
ret = ib_dma_mapping_error(xprt->sc_cm_id->device,
ctxt->sge[pno].addr);
if (ret)
goto err;
atomic_inc(&xprt->sc_dma_used);
/* The lkey here is either a local dma lkey or a dma_mr lkey */
ctxt->sge[pno].lkey = xprt->sc_dma_lkey;
ctxt->sge[pno].length = len;
ctxt->count++;
/* adjust offset and wrap to next page if needed */
pg_off += len;
if (pg_off == PAGE_SIZE) {
pg_off = 0;
pg_no++;
}
rs_length -= len;
}
if (last && rs_length == 0)
set_bit(RDMACTXT_F_LAST_CTXT, &ctxt->flags);
else
clear_bit(RDMACTXT_F_LAST_CTXT, &ctxt->flags);
memset(&read_wr, 0, sizeof(read_wr));
read_wr.wr_id = (unsigned long)ctxt;
read_wr.opcode = IB_WR_RDMA_READ;
ctxt->wr_op = read_wr.opcode;
read_wr.send_flags = IB_SEND_SIGNALED;
read_wr.wr.rdma.rkey = rs_handle;
read_wr.wr.rdma.remote_addr = rs_offset;
read_wr.sg_list = ctxt->sge;
read_wr.num_sge = pages_needed;
ret = svc_rdma_send(xprt, &read_wr);
if (ret) {
pr_err("svcrdma: Error %d posting RDMA_READ\n", ret);
set_bit(XPT_CLOSE, &xprt->sc_xprt.xpt_flags);
goto err;
}
/* return current location in page array */
*page_no = pg_no;
*page_offset = pg_off;
ret = read;
atomic_inc(&rdma_stat_read);
return ret;
err:
svc_rdma_unmap_dma(ctxt);
svc_rdma_put_context(ctxt, 0);
return ret;
}
/* Issue an RDMA_READ using an FRMR to map the data sink */
static int rdma_read_chunk_frmr(struct svcxprt_rdma *xprt,
struct svc_rqst *rqstp,
struct svc_rdma_op_ctxt *head,
int *page_no,
u32 *page_offset,
u32 rs_handle,
u32 rs_length,
u64 rs_offset,
int last)
{ {
struct ib_send_wr read_wr; struct ib_send_wr read_wr;
struct ib_send_wr inv_wr; struct ib_send_wr inv_wr;
int err = 0; struct ib_send_wr fastreg_wr;
int ch_no; u8 key;
int ch_count; int pages_needed = PAGE_ALIGN(*page_offset + rs_length) >> PAGE_SHIFT;
int byte_count; struct svc_rdma_op_ctxt *ctxt = svc_rdma_get_context(xprt);
int sge_count; struct svc_rdma_fastreg_mr *frmr = svc_rdma_get_frmr(xprt);
u64 sgl_offset; int ret, read, pno;
u32 pg_off = *page_offset;
u32 pg_no = *page_no;
if (IS_ERR(frmr))
return -ENOMEM;
ctxt->direction = DMA_FROM_DEVICE;
ctxt->frmr = frmr;
pages_needed = min_t(int, pages_needed, xprt->sc_frmr_pg_list_len);
read = min_t(int, pages_needed << PAGE_SHIFT, rs_length);
frmr->kva = page_address(rqstp->rq_arg.pages[pg_no]);
frmr->direction = DMA_FROM_DEVICE;
frmr->access_flags = (IB_ACCESS_LOCAL_WRITE|IB_ACCESS_REMOTE_WRITE);
frmr->map_len = pages_needed << PAGE_SHIFT;
frmr->page_list_len = pages_needed;
for (pno = 0; pno < pages_needed; pno++) {
int len = min_t(int, rs_length, PAGE_SIZE - pg_off);
head->arg.pages[pg_no] = rqstp->rq_arg.pages[pg_no];
head->arg.page_len += len;
head->arg.len += len;
if (!pg_off)
head->count++;
rqstp->rq_respages = &rqstp->rq_arg.pages[pg_no+1];
rqstp->rq_next_page = rqstp->rq_respages + 1;
frmr->page_list->page_list[pno] =
ib_dma_map_page(xprt->sc_cm_id->device,
head->arg.pages[pg_no], 0,
PAGE_SIZE, DMA_FROM_DEVICE);
ret = ib_dma_mapping_error(xprt->sc_cm_id->device,
frmr->page_list->page_list[pno]);
if (ret)
goto err;
atomic_inc(&xprt->sc_dma_used);
/* adjust offset and wrap to next page if needed */
pg_off += len;
if (pg_off == PAGE_SIZE) {
pg_off = 0;
pg_no++;
}
rs_length -= len;
}
if (last && rs_length == 0)
set_bit(RDMACTXT_F_LAST_CTXT, &ctxt->flags);
else
clear_bit(RDMACTXT_F_LAST_CTXT, &ctxt->flags);
/* Bump the key */
key = (u8)(frmr->mr->lkey & 0x000000FF);
ib_update_fast_reg_key(frmr->mr, ++key);
ctxt->sge[0].addr = (unsigned long)frmr->kva + *page_offset;
ctxt->sge[0].lkey = frmr->mr->lkey;
ctxt->sge[0].length = read;
ctxt->count = 1;
ctxt->read_hdr = head;
/* Prepare FASTREG WR */
memset(&fastreg_wr, 0, sizeof(fastreg_wr));
fastreg_wr.opcode = IB_WR_FAST_REG_MR;
fastreg_wr.send_flags = IB_SEND_SIGNALED;
fastreg_wr.wr.fast_reg.iova_start = (unsigned long)frmr->kva;
fastreg_wr.wr.fast_reg.page_list = frmr->page_list;
fastreg_wr.wr.fast_reg.page_list_len = frmr->page_list_len;
fastreg_wr.wr.fast_reg.page_shift = PAGE_SHIFT;
fastreg_wr.wr.fast_reg.length = frmr->map_len;
fastreg_wr.wr.fast_reg.access_flags = frmr->access_flags;
fastreg_wr.wr.fast_reg.rkey = frmr->mr->lkey;
fastreg_wr.next = &read_wr;
/* Prepare RDMA_READ */
memset(&read_wr, 0, sizeof(read_wr));
read_wr.send_flags = IB_SEND_SIGNALED;
read_wr.wr.rdma.rkey = rs_handle;
read_wr.wr.rdma.remote_addr = rs_offset;
read_wr.sg_list = ctxt->sge;
read_wr.num_sge = 1;
if (xprt->sc_dev_caps & SVCRDMA_DEVCAP_READ_W_INV) {
read_wr.opcode = IB_WR_RDMA_READ_WITH_INV;
read_wr.wr_id = (unsigned long)ctxt;
read_wr.ex.invalidate_rkey = ctxt->frmr->mr->lkey;
} else {
read_wr.opcode = IB_WR_RDMA_READ;
read_wr.next = &inv_wr;
/* Prepare invalidate */
memset(&inv_wr, 0, sizeof(inv_wr));
inv_wr.wr_id = (unsigned long)ctxt;
inv_wr.opcode = IB_WR_LOCAL_INV;
inv_wr.send_flags = IB_SEND_SIGNALED | IB_SEND_FENCE;
inv_wr.ex.invalidate_rkey = frmr->mr->lkey;
}
ctxt->wr_op = read_wr.opcode;
/* Post the chain */
ret = svc_rdma_send(xprt, &fastreg_wr);
if (ret) {
pr_err("svcrdma: Error %d posting RDMA_READ\n", ret);
set_bit(XPT_CLOSE, &xprt->sc_xprt.xpt_flags);
goto err;
}
/* return current location in page array */
*page_no = pg_no;
*page_offset = pg_off;
ret = read;
atomic_inc(&rdma_stat_read);
return ret;
err:
svc_rdma_unmap_dma(ctxt);
svc_rdma_put_context(ctxt, 0);
svc_rdma_put_frmr(xprt, frmr);
return ret;
}
static int rdma_read_chunks(struct svcxprt_rdma *xprt,
struct rpcrdma_msg *rmsgp,
struct svc_rqst *rqstp,
struct svc_rdma_op_ctxt *head)
{
int page_no, ch_count, ret;
struct rpcrdma_read_chunk *ch; struct rpcrdma_read_chunk *ch;
struct svc_rdma_op_ctxt *ctxt = NULL; u32 page_offset, byte_count;
struct svc_rdma_req_map *rpl_map; u64 rs_offset;
struct svc_rdma_req_map *chl_map; rdma_reader_fn reader;
/* If no read list is present, return 0 */ /* If no read list is present, return 0 */
ch = svc_rdma_get_read_chunk(rmsgp); ch = svc_rdma_get_read_chunk(rmsgp);
@ -408,122 +384,55 @@ static int rdma_read_xdr(struct svcxprt_rdma *xprt,
if (ch_count > RPCSVC_MAXPAGES) if (ch_count > RPCSVC_MAXPAGES)
return -EINVAL; return -EINVAL;
/* Allocate temporary reply and chunk maps */ /* The request is completed when the RDMA_READs complete. The
rpl_map = svc_rdma_get_req_map(); * head context keeps all the pages that comprise the
chl_map = svc_rdma_get_req_map(); * request.
*/
head->arg.head[0] = rqstp->rq_arg.head[0];
head->arg.tail[0] = rqstp->rq_arg.tail[0];
head->arg.pages = &head->pages[head->count];
head->hdr_count = head->count;
head->arg.page_base = 0;
head->arg.page_len = 0;
head->arg.len = rqstp->rq_arg.len;
head->arg.buflen = rqstp->rq_arg.buflen;
if (!xprt->sc_frmr_pg_list_len) /* Use FRMR if supported */
sge_count = map_read_chunks(xprt, rqstp, hdr_ctxt, rmsgp, if (xprt->sc_dev_caps & SVCRDMA_DEVCAP_FAST_REG)
rpl_map, chl_map, ch_count, reader = rdma_read_chunk_frmr;
byte_count);
else else
sge_count = fast_reg_read_chunks(xprt, rqstp, hdr_ctxt, rmsgp, reader = rdma_read_chunk_lcl;
rpl_map, chl_map, ch_count,
byte_count);
if (sge_count < 0) {
err = -EIO;
goto out;
}
sgl_offset = 0;
ch_no = 0;
page_no = 0; page_offset = 0;
for (ch = (struct rpcrdma_read_chunk *)&rmsgp->rm_body.rm_chunks[0]; for (ch = (struct rpcrdma_read_chunk *)&rmsgp->rm_body.rm_chunks[0];
ch->rc_discrim != 0; ch++, ch_no++) { ch->rc_discrim != 0; ch++) {
u64 rs_offset;
next_sge:
ctxt = svc_rdma_get_context(xprt);
ctxt->direction = DMA_FROM_DEVICE;
ctxt->frmr = hdr_ctxt->frmr;
ctxt->read_hdr = NULL;
clear_bit(RDMACTXT_F_LAST_CTXT, &ctxt->flags);
clear_bit(RDMACTXT_F_FAST_UNREG, &ctxt->flags);
/* Prepare READ WR */
memset(&read_wr, 0, sizeof read_wr);
read_wr.wr_id = (unsigned long)ctxt;
read_wr.opcode = IB_WR_RDMA_READ;
ctxt->wr_op = read_wr.opcode;
read_wr.send_flags = IB_SEND_SIGNALED;
read_wr.wr.rdma.rkey = ntohl(ch->rc_target.rs_handle);
xdr_decode_hyper((__be32 *)&ch->rc_target.rs_offset, xdr_decode_hyper((__be32 *)&ch->rc_target.rs_offset,
&rs_offset); &rs_offset);
read_wr.wr.rdma.remote_addr = rs_offset + sgl_offset; byte_count = ntohl(ch->rc_target.rs_length);
read_wr.sg_list = ctxt->sge;
read_wr.num_sge =
rdma_read_max_sge(xprt, chl_map->ch[ch_no].count);
err = rdma_set_ctxt_sge(xprt, ctxt, hdr_ctxt->frmr,
&rpl_map->sge[chl_map->ch[ch_no].start],
&sgl_offset,
read_wr.num_sge);
if (err) {
svc_rdma_unmap_dma(ctxt);
svc_rdma_put_context(ctxt, 0);
goto out;
}
if (((ch+1)->rc_discrim == 0) &&
(read_wr.num_sge == chl_map->ch[ch_no].count)) {
/*
* Mark the last RDMA_READ with a bit to
* indicate all RPC data has been fetched from
* the client and the RPC needs to be enqueued.
*/
set_bit(RDMACTXT_F_LAST_CTXT, &ctxt->flags);
if (hdr_ctxt->frmr) {
set_bit(RDMACTXT_F_FAST_UNREG, &ctxt->flags);
/*
* Invalidate the local MR used to map the data
* sink.
*/
if (xprt->sc_dev_caps &
SVCRDMA_DEVCAP_READ_W_INV) {
read_wr.opcode =
IB_WR_RDMA_READ_WITH_INV;
ctxt->wr_op = read_wr.opcode;
read_wr.ex.invalidate_rkey =
ctxt->frmr->mr->lkey;
} else {
/* Prepare INVALIDATE WR */
memset(&inv_wr, 0, sizeof inv_wr);
inv_wr.opcode = IB_WR_LOCAL_INV;
inv_wr.send_flags = IB_SEND_SIGNALED;
inv_wr.ex.invalidate_rkey =
hdr_ctxt->frmr->mr->lkey;
read_wr.next = &inv_wr;
}
}
ctxt->read_hdr = hdr_ctxt;
}
/* Post the read */
err = svc_rdma_send(xprt, &read_wr);
if (err) {
printk(KERN_ERR "svcrdma: Error %d posting RDMA_READ\n",
err);
set_bit(XPT_CLOSE, &xprt->sc_xprt.xpt_flags);
svc_rdma_unmap_dma(ctxt);
svc_rdma_put_context(ctxt, 0);
goto out;
}
atomic_inc(&rdma_stat_read);
if (read_wr.num_sge < chl_map->ch[ch_no].count) { while (byte_count > 0) {
chl_map->ch[ch_no].count -= read_wr.num_sge; ret = reader(xprt, rqstp, head,
chl_map->ch[ch_no].start += read_wr.num_sge; &page_no, &page_offset,
goto next_sge; ntohl(ch->rc_target.rs_handle),
byte_count, rs_offset,
((ch+1)->rc_discrim == 0) /* last */
);
if (ret < 0)
goto err;
byte_count -= ret;
rs_offset += ret;
head->arg.buflen += ret;
} }
sgl_offset = 0;
err = 1;
} }
ret = 1;
out: err:
svc_rdma_put_req_map(rpl_map);
svc_rdma_put_req_map(chl_map);
/* Detach arg pages. svc_recv will replenish them */ /* Detach arg pages. svc_recv will replenish them */
for (ch_no = 0; &rqstp->rq_pages[ch_no] < rqstp->rq_respages; ch_no++) for (page_no = 0;
rqstp->rq_pages[ch_no] = NULL; &rqstp->rq_pages[page_no] < rqstp->rq_respages; page_no++)
rqstp->rq_pages[page_no] = NULL;
return err; return ret;
} }
static int rdma_read_complete(struct svc_rqst *rqstp, static int rdma_read_complete(struct svc_rqst *rqstp,
@ -595,13 +504,9 @@ int svc_rdma_recvfrom(struct svc_rqst *rqstp)
struct svc_rdma_op_ctxt, struct svc_rdma_op_ctxt,
dto_q); dto_q);
list_del_init(&ctxt->dto_q); list_del_init(&ctxt->dto_q);
}
if (ctxt) {
spin_unlock_bh(&rdma_xprt->sc_rq_dto_lock); spin_unlock_bh(&rdma_xprt->sc_rq_dto_lock);
return rdma_read_complete(rqstp, ctxt); return rdma_read_complete(rqstp, ctxt);
} } else if (!list_empty(&rdma_xprt->sc_rq_dto_q)) {
if (!list_empty(&rdma_xprt->sc_rq_dto_q)) {
ctxt = list_entry(rdma_xprt->sc_rq_dto_q.next, ctxt = list_entry(rdma_xprt->sc_rq_dto_q.next,
struct svc_rdma_op_ctxt, struct svc_rdma_op_ctxt,
dto_q); dto_q);
@ -621,7 +526,6 @@ int svc_rdma_recvfrom(struct svc_rqst *rqstp)
if (test_bit(XPT_CLOSE, &xprt->xpt_flags)) if (test_bit(XPT_CLOSE, &xprt->xpt_flags))
goto close_out; goto close_out;
BUG_ON(ret);
goto out; goto out;
} }
dprintk("svcrdma: processing ctxt=%p on xprt=%p, rqstp=%p, status=%d\n", dprintk("svcrdma: processing ctxt=%p on xprt=%p, rqstp=%p, status=%d\n",
@ -644,12 +548,11 @@ int svc_rdma_recvfrom(struct svc_rqst *rqstp)
} }
/* Read read-list data. */ /* Read read-list data. */
ret = rdma_read_xdr(rdma_xprt, rmsgp, rqstp, ctxt); ret = rdma_read_chunks(rdma_xprt, rmsgp, rqstp, ctxt);
if (ret > 0) { if (ret > 0) {
/* read-list posted, defer until data received from client. */ /* read-list posted, defer until data received from client. */
goto defer; goto defer;
} } else if (ret < 0) {
if (ret < 0) {
/* Post of read-list failed, free context. */ /* Post of read-list failed, free context. */
svc_rdma_put_context(ctxt, 1); svc_rdma_put_context(ctxt, 1);
return 0; return 0;

View File

@ -1,4 +1,5 @@
/* /*
* Copyright (c) 2014 Open Grid Computing, Inc. All rights reserved.
* Copyright (c) 2005-2006 Network Appliance, Inc. All rights reserved. * Copyright (c) 2005-2006 Network Appliance, Inc. All rights reserved.
* *
* This software is available to you under a choice of one of two * This software is available to you under a choice of one of two
@ -49,152 +50,6 @@
#define RPCDBG_FACILITY RPCDBG_SVCXPRT #define RPCDBG_FACILITY RPCDBG_SVCXPRT
/* Encode an XDR as an array of IB SGE
*
* Assumptions:
* - head[0] is physically contiguous.
* - tail[0] is physically contiguous.
* - pages[] is not physically or virtually contiguous and consists of
* PAGE_SIZE elements.
*
* Output:
* SGE[0] reserved for RCPRDMA header
* SGE[1] data from xdr->head[]
* SGE[2..sge_count-2] data from xdr->pages[]
* SGE[sge_count-1] data from xdr->tail.
*
* The max SGE we need is the length of the XDR / pagesize + one for
* head + one for tail + one for RPCRDMA header. Since RPCSVC_MAXPAGES
* reserves a page for both the request and the reply header, and this
* array is only concerned with the reply we are assured that we have
* on extra page for the RPCRMDA header.
*/
static int fast_reg_xdr(struct svcxprt_rdma *xprt,
struct xdr_buf *xdr,
struct svc_rdma_req_map *vec)
{
int sge_no;
u32 sge_bytes;
u32 page_bytes;
u32 page_off;
int page_no = 0;
u8 *frva;
struct svc_rdma_fastreg_mr *frmr;
frmr = svc_rdma_get_frmr(xprt);
if (IS_ERR(frmr))
return -ENOMEM;
vec->frmr = frmr;
/* Skip the RPCRDMA header */
sge_no = 1;
/* Map the head. */
frva = (void *)((unsigned long)(xdr->head[0].iov_base) & PAGE_MASK);
vec->sge[sge_no].iov_base = xdr->head[0].iov_base;
vec->sge[sge_no].iov_len = xdr->head[0].iov_len;
vec->count = 2;
sge_no++;
/* Map the XDR head */
frmr->kva = frva;
frmr->direction = DMA_TO_DEVICE;
frmr->access_flags = 0;
frmr->map_len = PAGE_SIZE;
frmr->page_list_len = 1;
page_off = (unsigned long)xdr->head[0].iov_base & ~PAGE_MASK;
frmr->page_list->page_list[page_no] =
ib_dma_map_page(xprt->sc_cm_id->device,
virt_to_page(xdr->head[0].iov_base),
page_off,
PAGE_SIZE - page_off,
DMA_TO_DEVICE);
if (ib_dma_mapping_error(xprt->sc_cm_id->device,
frmr->page_list->page_list[page_no]))
goto fatal_err;
atomic_inc(&xprt->sc_dma_used);
/* Map the XDR page list */
page_off = xdr->page_base;
page_bytes = xdr->page_len + page_off;
if (!page_bytes)
goto encode_tail;
/* Map the pages */
vec->sge[sge_no].iov_base = frva + frmr->map_len + page_off;
vec->sge[sge_no].iov_len = page_bytes;
sge_no++;
while (page_bytes) {
struct page *page;
page = xdr->pages[page_no++];
sge_bytes = min_t(u32, page_bytes, (PAGE_SIZE - page_off));
page_bytes -= sge_bytes;
frmr->page_list->page_list[page_no] =
ib_dma_map_page(xprt->sc_cm_id->device,
page, page_off,
sge_bytes, DMA_TO_DEVICE);
if (ib_dma_mapping_error(xprt->sc_cm_id->device,
frmr->page_list->page_list[page_no]))
goto fatal_err;
atomic_inc(&xprt->sc_dma_used);
page_off = 0; /* reset for next time through loop */
frmr->map_len += PAGE_SIZE;
frmr->page_list_len++;
}
vec->count++;
encode_tail:
/* Map tail */
if (0 == xdr->tail[0].iov_len)
goto done;
vec->count++;
vec->sge[sge_no].iov_len = xdr->tail[0].iov_len;
if (((unsigned long)xdr->tail[0].iov_base & PAGE_MASK) ==
((unsigned long)xdr->head[0].iov_base & PAGE_MASK)) {
/*
* If head and tail use the same page, we don't need
* to map it again.
*/
vec->sge[sge_no].iov_base = xdr->tail[0].iov_base;
} else {
void *va;
/* Map another page for the tail */
page_off = (unsigned long)xdr->tail[0].iov_base & ~PAGE_MASK;
va = (void *)((unsigned long)xdr->tail[0].iov_base & PAGE_MASK);
vec->sge[sge_no].iov_base = frva + frmr->map_len + page_off;
frmr->page_list->page_list[page_no] =
ib_dma_map_page(xprt->sc_cm_id->device, virt_to_page(va),
page_off,
PAGE_SIZE,
DMA_TO_DEVICE);
if (ib_dma_mapping_error(xprt->sc_cm_id->device,
frmr->page_list->page_list[page_no]))
goto fatal_err;
atomic_inc(&xprt->sc_dma_used);
frmr->map_len += PAGE_SIZE;
frmr->page_list_len++;
}
done:
if (svc_rdma_fastreg(xprt, frmr))
goto fatal_err;
return 0;
fatal_err:
printk("svcrdma: Error fast registering memory for xprt %p\n", xprt);
vec->frmr = NULL;
svc_rdma_put_frmr(xprt, frmr);
return -EIO;
}
static int map_xdr(struct svcxprt_rdma *xprt, static int map_xdr(struct svcxprt_rdma *xprt,
struct xdr_buf *xdr, struct xdr_buf *xdr,
struct svc_rdma_req_map *vec) struct svc_rdma_req_map *vec)
@ -208,9 +63,6 @@ static int map_xdr(struct svcxprt_rdma *xprt,
BUG_ON(xdr->len != BUG_ON(xdr->len !=
(xdr->head[0].iov_len + xdr->page_len + xdr->tail[0].iov_len)); (xdr->head[0].iov_len + xdr->page_len + xdr->tail[0].iov_len));
if (xprt->sc_frmr_pg_list_len)
return fast_reg_xdr(xprt, xdr, vec);
/* Skip the first sge, this is for the RPCRDMA header */ /* Skip the first sge, this is for the RPCRDMA header */
sge_no = 1; sge_no = 1;
@ -282,8 +134,6 @@ static dma_addr_t dma_map_xdr(struct svcxprt_rdma *xprt,
} }
/* Assumptions: /* Assumptions:
* - We are using FRMR
* - or -
* - The specified write_len can be represented in sc_max_sge * PAGE_SIZE * - The specified write_len can be represented in sc_max_sge * PAGE_SIZE
*/ */
static int send_write(struct svcxprt_rdma *xprt, struct svc_rqst *rqstp, static int send_write(struct svcxprt_rdma *xprt, struct svc_rqst *rqstp,
@ -327,7 +177,6 @@ static int send_write(struct svcxprt_rdma *xprt, struct svc_rqst *rqstp,
sge_bytes = min_t(size_t, sge_bytes = min_t(size_t,
bc, vec->sge[xdr_sge_no].iov_len-sge_off); bc, vec->sge[xdr_sge_no].iov_len-sge_off);
sge[sge_no].length = sge_bytes; sge[sge_no].length = sge_bytes;
if (!vec->frmr) {
sge[sge_no].addr = sge[sge_no].addr =
dma_map_xdr(xprt, &rqstp->rq_res, xdr_off, dma_map_xdr(xprt, &rqstp->rq_res, xdr_off,
sge_bytes, DMA_TO_DEVICE); sge_bytes, DMA_TO_DEVICE);
@ -337,13 +186,7 @@ static int send_write(struct svcxprt_rdma *xprt, struct svc_rqst *rqstp,
goto err; goto err;
atomic_inc(&xprt->sc_dma_used); atomic_inc(&xprt->sc_dma_used);
sge[sge_no].lkey = xprt->sc_dma_lkey; sge[sge_no].lkey = xprt->sc_dma_lkey;
} else {
sge[sge_no].addr = (unsigned long)
vec->sge[xdr_sge_no].iov_base + sge_off;
sge[sge_no].lkey = vec->frmr->mr->lkey;
}
ctxt->count++; ctxt->count++;
ctxt->frmr = vec->frmr;
sge_off = 0; sge_off = 0;
sge_no++; sge_no++;
xdr_sge_no++; xdr_sge_no++;
@ -369,7 +212,6 @@ static int send_write(struct svcxprt_rdma *xprt, struct svc_rqst *rqstp,
return 0; return 0;
err: err:
svc_rdma_unmap_dma(ctxt); svc_rdma_unmap_dma(ctxt);
svc_rdma_put_frmr(xprt, vec->frmr);
svc_rdma_put_context(ctxt, 0); svc_rdma_put_context(ctxt, 0);
/* Fatal error, close transport */ /* Fatal error, close transport */
return -EIO; return -EIO;
@ -397,9 +239,6 @@ static int send_write_chunks(struct svcxprt_rdma *xprt,
res_ary = (struct rpcrdma_write_array *) res_ary = (struct rpcrdma_write_array *)
&rdma_resp->rm_body.rm_chunks[1]; &rdma_resp->rm_body.rm_chunks[1];
if (vec->frmr)
max_write = vec->frmr->map_len;
else
max_write = xprt->sc_max_sge * PAGE_SIZE; max_write = xprt->sc_max_sge * PAGE_SIZE;
/* Write chunks start at the pagelist */ /* Write chunks start at the pagelist */
@ -472,9 +311,6 @@ static int send_reply_chunks(struct svcxprt_rdma *xprt,
res_ary = (struct rpcrdma_write_array *) res_ary = (struct rpcrdma_write_array *)
&rdma_resp->rm_body.rm_chunks[2]; &rdma_resp->rm_body.rm_chunks[2];
if (vec->frmr)
max_write = vec->frmr->map_len;
else
max_write = xprt->sc_max_sge * PAGE_SIZE; max_write = xprt->sc_max_sge * PAGE_SIZE;
/* xdr offset starts at RPC message */ /* xdr offset starts at RPC message */
@ -545,7 +381,6 @@ static int send_reply(struct svcxprt_rdma *rdma,
int byte_count) int byte_count)
{ {
struct ib_send_wr send_wr; struct ib_send_wr send_wr;
struct ib_send_wr inv_wr;
int sge_no; int sge_no;
int sge_bytes; int sge_bytes;
int page_no; int page_no;
@ -559,7 +394,6 @@ static int send_reply(struct svcxprt_rdma *rdma,
"svcrdma: could not post a receive buffer, err=%d." "svcrdma: could not post a receive buffer, err=%d."
"Closing transport %p.\n", ret, rdma); "Closing transport %p.\n", ret, rdma);
set_bit(XPT_CLOSE, &rdma->sc_xprt.xpt_flags); set_bit(XPT_CLOSE, &rdma->sc_xprt.xpt_flags);
svc_rdma_put_frmr(rdma, vec->frmr);
svc_rdma_put_context(ctxt, 0); svc_rdma_put_context(ctxt, 0);
return -ENOTCONN; return -ENOTCONN;
} }
@ -567,11 +401,6 @@ static int send_reply(struct svcxprt_rdma *rdma,
/* Prepare the context */ /* Prepare the context */
ctxt->pages[0] = page; ctxt->pages[0] = page;
ctxt->count = 1; ctxt->count = 1;
ctxt->frmr = vec->frmr;
if (vec->frmr)
set_bit(RDMACTXT_F_FAST_UNREG, &ctxt->flags);
else
clear_bit(RDMACTXT_F_FAST_UNREG, &ctxt->flags);
/* Prepare the SGE for the RPCRDMA Header */ /* Prepare the SGE for the RPCRDMA Header */
ctxt->sge[0].lkey = rdma->sc_dma_lkey; ctxt->sge[0].lkey = rdma->sc_dma_lkey;
@ -590,7 +419,6 @@ static int send_reply(struct svcxprt_rdma *rdma,
int xdr_off = 0; int xdr_off = 0;
sge_bytes = min_t(size_t, vec->sge[sge_no].iov_len, byte_count); sge_bytes = min_t(size_t, vec->sge[sge_no].iov_len, byte_count);
byte_count -= sge_bytes; byte_count -= sge_bytes;
if (!vec->frmr) {
ctxt->sge[sge_no].addr = ctxt->sge[sge_no].addr =
dma_map_xdr(rdma, &rqstp->rq_res, xdr_off, dma_map_xdr(rdma, &rqstp->rq_res, xdr_off,
sge_bytes, DMA_TO_DEVICE); sge_bytes, DMA_TO_DEVICE);
@ -600,11 +428,6 @@ static int send_reply(struct svcxprt_rdma *rdma,
goto err; goto err;
atomic_inc(&rdma->sc_dma_used); atomic_inc(&rdma->sc_dma_used);
ctxt->sge[sge_no].lkey = rdma->sc_dma_lkey; ctxt->sge[sge_no].lkey = rdma->sc_dma_lkey;
} else {
ctxt->sge[sge_no].addr = (unsigned long)
vec->sge[sge_no].iov_base;
ctxt->sge[sge_no].lkey = vec->frmr->mr->lkey;
}
ctxt->sge[sge_no].length = sge_bytes; ctxt->sge[sge_no].length = sge_bytes;
} }
BUG_ON(byte_count != 0); BUG_ON(byte_count != 0);
@ -627,6 +450,7 @@ static int send_reply(struct svcxprt_rdma *rdma,
ctxt->sge[page_no+1].length = 0; ctxt->sge[page_no+1].length = 0;
} }
rqstp->rq_next_page = rqstp->rq_respages + 1; rqstp->rq_next_page = rqstp->rq_respages + 1;
BUG_ON(sge_no > rdma->sc_max_sge); BUG_ON(sge_no > rdma->sc_max_sge);
memset(&send_wr, 0, sizeof send_wr); memset(&send_wr, 0, sizeof send_wr);
ctxt->wr_op = IB_WR_SEND; ctxt->wr_op = IB_WR_SEND;
@ -635,15 +459,6 @@ static int send_reply(struct svcxprt_rdma *rdma,
send_wr.num_sge = sge_no; send_wr.num_sge = sge_no;
send_wr.opcode = IB_WR_SEND; send_wr.opcode = IB_WR_SEND;
send_wr.send_flags = IB_SEND_SIGNALED; send_wr.send_flags = IB_SEND_SIGNALED;
if (vec->frmr) {
/* Prepare INVALIDATE WR */
memset(&inv_wr, 0, sizeof inv_wr);
inv_wr.opcode = IB_WR_LOCAL_INV;
inv_wr.send_flags = IB_SEND_SIGNALED;
inv_wr.ex.invalidate_rkey =
vec->frmr->mr->lkey;
send_wr.next = &inv_wr;
}
ret = svc_rdma_send(rdma, &send_wr); ret = svc_rdma_send(rdma, &send_wr);
if (ret) if (ret)
@ -653,7 +468,6 @@ static int send_reply(struct svcxprt_rdma *rdma,
err: err:
svc_rdma_unmap_dma(ctxt); svc_rdma_unmap_dma(ctxt);
svc_rdma_put_frmr(rdma, vec->frmr);
svc_rdma_put_context(ctxt, 1); svc_rdma_put_context(ctxt, 1);
return -EIO; return -EIO;
} }

View File

@ -1,4 +1,5 @@
/* /*
* Copyright (c) 2014 Open Grid Computing, Inc. All rights reserved.
* Copyright (c) 2005-2007 Network Appliance, Inc. All rights reserved. * Copyright (c) 2005-2007 Network Appliance, Inc. All rights reserved.
* *
* This software is available to you under a choice of one of two * This software is available to you under a choice of one of two
@ -65,6 +66,7 @@ static void dto_tasklet_func(unsigned long data);
static void svc_rdma_detach(struct svc_xprt *xprt); static void svc_rdma_detach(struct svc_xprt *xprt);
static void svc_rdma_free(struct svc_xprt *xprt); static void svc_rdma_free(struct svc_xprt *xprt);
static int svc_rdma_has_wspace(struct svc_xprt *xprt); static int svc_rdma_has_wspace(struct svc_xprt *xprt);
static int svc_rdma_secure_port(struct svc_rqst *);
static void rq_cq_reap(struct svcxprt_rdma *xprt); static void rq_cq_reap(struct svcxprt_rdma *xprt);
static void sq_cq_reap(struct svcxprt_rdma *xprt); static void sq_cq_reap(struct svcxprt_rdma *xprt);
@ -82,6 +84,7 @@ static struct svc_xprt_ops svc_rdma_ops = {
.xpo_prep_reply_hdr = svc_rdma_prep_reply_hdr, .xpo_prep_reply_hdr = svc_rdma_prep_reply_hdr,
.xpo_has_wspace = svc_rdma_has_wspace, .xpo_has_wspace = svc_rdma_has_wspace,
.xpo_accept = svc_rdma_accept, .xpo_accept = svc_rdma_accept,
.xpo_secure_port = svc_rdma_secure_port,
}; };
struct svc_xprt_class svc_rdma_class = { struct svc_xprt_class svc_rdma_class = {
@ -160,7 +163,6 @@ struct svc_rdma_req_map *svc_rdma_get_req_map(void)
schedule_timeout_uninterruptible(msecs_to_jiffies(500)); schedule_timeout_uninterruptible(msecs_to_jiffies(500));
} }
map->count = 0; map->count = 0;
map->frmr = NULL;
return map; return map;
} }
@ -336,22 +338,21 @@ static void process_context(struct svcxprt_rdma *xprt,
switch (ctxt->wr_op) { switch (ctxt->wr_op) {
case IB_WR_SEND: case IB_WR_SEND:
if (test_bit(RDMACTXT_F_FAST_UNREG, &ctxt->flags)) BUG_ON(ctxt->frmr);
svc_rdma_put_frmr(xprt, ctxt->frmr);
svc_rdma_put_context(ctxt, 1); svc_rdma_put_context(ctxt, 1);
break; break;
case IB_WR_RDMA_WRITE: case IB_WR_RDMA_WRITE:
BUG_ON(ctxt->frmr);
svc_rdma_put_context(ctxt, 0); svc_rdma_put_context(ctxt, 0);
break; break;
case IB_WR_RDMA_READ: case IB_WR_RDMA_READ:
case IB_WR_RDMA_READ_WITH_INV: case IB_WR_RDMA_READ_WITH_INV:
svc_rdma_put_frmr(xprt, ctxt->frmr);
if (test_bit(RDMACTXT_F_LAST_CTXT, &ctxt->flags)) { if (test_bit(RDMACTXT_F_LAST_CTXT, &ctxt->flags)) {
struct svc_rdma_op_ctxt *read_hdr = ctxt->read_hdr; struct svc_rdma_op_ctxt *read_hdr = ctxt->read_hdr;
BUG_ON(!read_hdr); BUG_ON(!read_hdr);
if (test_bit(RDMACTXT_F_FAST_UNREG, &ctxt->flags))
svc_rdma_put_frmr(xprt, ctxt->frmr);
spin_lock_bh(&xprt->sc_rq_dto_lock); spin_lock_bh(&xprt->sc_rq_dto_lock);
set_bit(XPT_DATA, &xprt->sc_xprt.xpt_flags); set_bit(XPT_DATA, &xprt->sc_xprt.xpt_flags);
list_add_tail(&read_hdr->dto_q, list_add_tail(&read_hdr->dto_q,
@ -363,6 +364,7 @@ static void process_context(struct svcxprt_rdma *xprt,
break; break;
default: default:
BUG_ON(1);
printk(KERN_ERR "svcrdma: unexpected completion type, " printk(KERN_ERR "svcrdma: unexpected completion type, "
"opcode=%d\n", "opcode=%d\n",
ctxt->wr_op); ctxt->wr_op);
@ -378,30 +380,43 @@ static void process_context(struct svcxprt_rdma *xprt,
static void sq_cq_reap(struct svcxprt_rdma *xprt) static void sq_cq_reap(struct svcxprt_rdma *xprt)
{ {
struct svc_rdma_op_ctxt *ctxt = NULL; struct svc_rdma_op_ctxt *ctxt = NULL;
struct ib_wc wc; struct ib_wc wc_a[6];
struct ib_wc *wc;
struct ib_cq *cq = xprt->sc_sq_cq; struct ib_cq *cq = xprt->sc_sq_cq;
int ret; int ret;
memset(wc_a, 0, sizeof(wc_a));
if (!test_and_clear_bit(RDMAXPRT_SQ_PENDING, &xprt->sc_flags)) if (!test_and_clear_bit(RDMAXPRT_SQ_PENDING, &xprt->sc_flags))
return; return;
ib_req_notify_cq(xprt->sc_sq_cq, IB_CQ_NEXT_COMP); ib_req_notify_cq(xprt->sc_sq_cq, IB_CQ_NEXT_COMP);
atomic_inc(&rdma_stat_sq_poll); atomic_inc(&rdma_stat_sq_poll);
while ((ret = ib_poll_cq(cq, 1, &wc)) > 0) { while ((ret = ib_poll_cq(cq, ARRAY_SIZE(wc_a), wc_a)) > 0) {
if (wc.status != IB_WC_SUCCESS) int i;
for (i = 0; i < ret; i++) {
wc = &wc_a[i];
if (wc->status != IB_WC_SUCCESS) {
dprintk("svcrdma: sq wc err status %d\n",
wc->status);
/* Close the transport */ /* Close the transport */
set_bit(XPT_CLOSE, &xprt->sc_xprt.xpt_flags); set_bit(XPT_CLOSE, &xprt->sc_xprt.xpt_flags);
}
/* Decrement used SQ WR count */ /* Decrement used SQ WR count */
atomic_dec(&xprt->sc_sq_count); atomic_dec(&xprt->sc_sq_count);
wake_up(&xprt->sc_send_wait); wake_up(&xprt->sc_send_wait);
ctxt = (struct svc_rdma_op_ctxt *)(unsigned long)wc.wr_id; ctxt = (struct svc_rdma_op_ctxt *)
(unsigned long)wc->wr_id;
if (ctxt) if (ctxt)
process_context(xprt, ctxt); process_context(xprt, ctxt);
svc_xprt_put(&xprt->sc_xprt); svc_xprt_put(&xprt->sc_xprt);
} }
}
if (ctxt) if (ctxt)
atomic_inc(&rdma_stat_sq_prod); atomic_inc(&rdma_stat_sq_prod);
@ -993,7 +1008,11 @@ static struct svc_xprt *svc_rdma_accept(struct svc_xprt *xprt)
need_dma_mr = 0; need_dma_mr = 0;
break; break;
case RDMA_TRANSPORT_IB: case RDMA_TRANSPORT_IB:
if (!(devattr.device_cap_flags & IB_DEVICE_LOCAL_DMA_LKEY)) { if (!(newxprt->sc_dev_caps & SVCRDMA_DEVCAP_FAST_REG)) {
need_dma_mr = 1;
dma_mr_acc = IB_ACCESS_LOCAL_WRITE;
} else if (!(devattr.device_cap_flags &
IB_DEVICE_LOCAL_DMA_LKEY)) {
need_dma_mr = 1; need_dma_mr = 1;
dma_mr_acc = IB_ACCESS_LOCAL_WRITE; dma_mr_acc = IB_ACCESS_LOCAL_WRITE;
} else } else
@ -1190,14 +1209,7 @@ static int svc_rdma_has_wspace(struct svc_xprt *xprt)
container_of(xprt, struct svcxprt_rdma, sc_xprt); container_of(xprt, struct svcxprt_rdma, sc_xprt);
/* /*
* If there are fewer SQ WR available than required to send a * If there are already waiters on the SQ,
* simple response, return false.
*/
if ((rdma->sc_sq_depth - atomic_read(&rdma->sc_sq_count) < 3))
return 0;
/*
* ...or there are already waiters on the SQ,
* return false. * return false.
*/ */
if (waitqueue_active(&rdma->sc_send_wait)) if (waitqueue_active(&rdma->sc_send_wait))
@ -1207,6 +1219,11 @@ static int svc_rdma_has_wspace(struct svc_xprt *xprt)
return 1; return 1;
} }
static int svc_rdma_secure_port(struct svc_rqst *rqstp)
{
return 1;
}
/* /*
* Attempt to register the kvec representing the RPC memory with the * Attempt to register the kvec representing the RPC memory with the
* device. * device.