Merge branch 'for-linus' of git://linux-nfs.org/~bfields/linux

* 'for-linus' of git://linux-nfs.org/~bfields/linux: (100 commits)
  SUNRPC: RPC program information is stored in unsigned integers
  SUNRPC: Move exported symbol definitions after function declaration part 2
  NLM: tear down RPC clients in nlm_shutdown_hosts
  SUNRPC: spin svc_rqst initialization to its own function
  nfsd: more careful input validation in nfsctl write methods
  lockd: minor log message fix
  knfsd: don't bother mapping putrootfh enoent to eperm
  rdma: makefile
  rdma: ONCRPC RDMA protocol marshalling
  rdma: SVCRDMA sendto
  rdma: SVCRDMA recvfrom
  rdma: SVCRDMA Core Transport Services
  rdma: SVCRDMA Transport Module
  rdma: SVCRMDA Header File
  svc: Add svc_xprt_names service to replace svc_sock_names
  knfsd: Support adding transports by writing portlist file
  svc: Add svc API that queries for a transport instance
  svc: Add /proc/sys/sunrpc/transport files
  svc: Add transport hdr size for defer/revisit
  svc: Move the xprt independent code to the svc_xprt.c file
  ...
This commit is contained in:
Linus Torvalds 2008-02-02 14:31:28 +11:00
commit 63e9b66e29
60 changed files with 5449 additions and 1663 deletions

View File

@ -2247,7 +2247,7 @@ P: J. Bruce Fields
M: bfields@fieldses.org M: bfields@fieldses.org
P: Neil Brown P: Neil Brown
M: neilb@suse.de M: neilb@suse.de
L: nfs@lists.sourceforge.net L: linux-nfs@vger.kernel.org
W: http://nfs.sourceforge.net/ W: http://nfs.sourceforge.net/
S: Supported S: Supported

View File

@ -1674,6 +1674,8 @@ config NFSD
select CRYPTO_MD5 if NFSD_V4 select CRYPTO_MD5 if NFSD_V4
select CRYPTO if NFSD_V4 select CRYPTO if NFSD_V4
select FS_POSIX_ACL if NFSD_V4 select FS_POSIX_ACL if NFSD_V4
select PROC_FS if NFSD_V4
select PROC_FS if SUNRPC_GSS
help help
If you want your Linux box to act as an NFS *server*, so that other If you want your Linux box to act as an NFS *server*, so that other
computers on your local network which support NFS can access certain computers on your local network which support NFS can access certain

View File

@ -34,10 +34,10 @@ static DEFINE_MUTEX(nlm_host_mutex);
static void nlm_gc_hosts(void); static void nlm_gc_hosts(void);
static struct nsm_handle * __nsm_find(const struct sockaddr_in *, static struct nsm_handle * __nsm_find(const struct sockaddr_in *,
const char *, int, int); const char *, unsigned int, int);
static struct nsm_handle * nsm_find(const struct sockaddr_in *sin, static struct nsm_handle * nsm_find(const struct sockaddr_in *sin,
const char *hostname, const char *hostname,
int hostname_len); unsigned int hostname_len);
/* /*
* Common host lookup routine for server & client * Common host lookup routine for server & client
@ -45,7 +45,8 @@ static struct nsm_handle * nsm_find(const struct sockaddr_in *sin,
static struct nlm_host * static struct nlm_host *
nlm_lookup_host(int server, const struct sockaddr_in *sin, nlm_lookup_host(int server, const struct sockaddr_in *sin,
int proto, int version, const char *hostname, int proto, int version, const char *hostname,
int hostname_len, const struct sockaddr_in *ssin) unsigned int hostname_len,
const struct sockaddr_in *ssin)
{ {
struct hlist_head *chain; struct hlist_head *chain;
struct hlist_node *pos; struct hlist_node *pos;
@ -176,7 +177,7 @@ nlm_destroy_host(struct nlm_host *host)
*/ */
struct nlm_host * struct nlm_host *
nlmclnt_lookup_host(const struct sockaddr_in *sin, int proto, int version, nlmclnt_lookup_host(const struct sockaddr_in *sin, int proto, int version,
const char *hostname, int hostname_len) const char *hostname, unsigned int hostname_len)
{ {
struct sockaddr_in ssin = {0}; struct sockaddr_in ssin = {0};
@ -189,7 +190,7 @@ nlmclnt_lookup_host(const struct sockaddr_in *sin, int proto, int version,
*/ */
struct nlm_host * struct nlm_host *
nlmsvc_lookup_host(struct svc_rqst *rqstp, nlmsvc_lookup_host(struct svc_rqst *rqstp,
const char *hostname, int hostname_len) const char *hostname, unsigned int hostname_len)
{ {
struct sockaddr_in ssin = {0}; struct sockaddr_in ssin = {0};
@ -307,7 +308,8 @@ void nlm_release_host(struct nlm_host *host)
* Release all resources held by that peer. * Release all resources held by that peer.
*/ */
void nlm_host_rebooted(const struct sockaddr_in *sin, void nlm_host_rebooted(const struct sockaddr_in *sin,
const char *hostname, int hostname_len, const char *hostname,
unsigned int hostname_len,
u32 new_state) u32 new_state)
{ {
struct hlist_head *chain; struct hlist_head *chain;
@ -377,8 +379,13 @@ nlm_shutdown_hosts(void)
/* First, make all hosts eligible for gc */ /* First, make all hosts eligible for gc */
dprintk("lockd: nuking all hosts...\n"); dprintk("lockd: nuking all hosts...\n");
for (chain = nlm_hosts; chain < nlm_hosts + NLM_HOST_NRHASH; ++chain) { for (chain = nlm_hosts; chain < nlm_hosts + NLM_HOST_NRHASH; ++chain) {
hlist_for_each_entry(host, pos, chain, h_hash) hlist_for_each_entry(host, pos, chain, h_hash) {
host->h_expires = jiffies - 1; host->h_expires = jiffies - 1;
if (host->h_rpcclnt) {
rpc_shutdown_client(host->h_rpcclnt);
host->h_rpcclnt = NULL;
}
}
} }
/* Then, perform a garbage collection pass */ /* Then, perform a garbage collection pass */
@ -449,7 +456,7 @@ static DEFINE_MUTEX(nsm_mutex);
static struct nsm_handle * static struct nsm_handle *
__nsm_find(const struct sockaddr_in *sin, __nsm_find(const struct sockaddr_in *sin,
const char *hostname, int hostname_len, const char *hostname, unsigned int hostname_len,
int create) int create)
{ {
struct nsm_handle *nsm = NULL; struct nsm_handle *nsm = NULL;
@ -503,7 +510,8 @@ out:
} }
static struct nsm_handle * static struct nsm_handle *
nsm_find(const struct sockaddr_in *sin, const char *hostname, int hostname_len) nsm_find(const struct sockaddr_in *sin, const char *hostname,
unsigned int hostname_len)
{ {
return __nsm_find(sin, hostname, hostname_len, 1); return __nsm_find(sin, hostname, hostname_len, 1);
} }

View File

@ -219,19 +219,6 @@ lockd(struct svc_rqst *rqstp)
module_put_and_exit(0); module_put_and_exit(0);
} }
static int find_socket(struct svc_serv *serv, int proto)
{
struct svc_sock *svsk;
int found = 0;
list_for_each_entry(svsk, &serv->sv_permsocks, sk_list)
if (svsk->sk_sk->sk_protocol == proto) {
found = 1;
break;
}
return found;
}
/* /*
* Make any sockets that are needed but not present. * Make any sockets that are needed but not present.
* If nlm_udpport or nlm_tcpport were set as module * If nlm_udpport or nlm_tcpport were set as module
@ -240,17 +227,25 @@ static int find_socket(struct svc_serv *serv, int proto)
static int make_socks(struct svc_serv *serv, int proto) static int make_socks(struct svc_serv *serv, int proto)
{ {
static int warned; static int warned;
struct svc_xprt *xprt;
int err = 0; int err = 0;
if (proto == IPPROTO_UDP || nlm_udpport) if (proto == IPPROTO_UDP || nlm_udpport) {
if (!find_socket(serv, IPPROTO_UDP)) xprt = svc_find_xprt(serv, "udp", 0, 0);
err = svc_makesock(serv, IPPROTO_UDP, nlm_udpport, if (!xprt)
SVC_SOCK_DEFAULTS); err = svc_create_xprt(serv, "udp", nlm_udpport,
if (err >= 0 && (proto == IPPROTO_TCP || nlm_tcpport)) SVC_SOCK_DEFAULTS);
if (!find_socket(serv, IPPROTO_TCP)) else
err = svc_makesock(serv, IPPROTO_TCP, nlm_tcpport, svc_xprt_put(xprt);
SVC_SOCK_DEFAULTS); }
if (err >= 0 && (proto == IPPROTO_TCP || nlm_tcpport)) {
xprt = svc_find_xprt(serv, "tcp", 0, 0);
if (!xprt)
err = svc_create_xprt(serv, "tcp", nlm_tcpport,
SVC_SOCK_DEFAULTS);
else
svc_xprt_put(xprt);
}
if (err >= 0) { if (err >= 0) {
warned = 0; warned = 0;
err = 0; err = 0;

View File

@ -84,6 +84,7 @@ nlm4svc_proc_test(struct svc_rqst *rqstp, struct nlm_args *argp,
{ {
struct nlm_host *host; struct nlm_host *host;
struct nlm_file *file; struct nlm_file *file;
int rc = rpc_success;
dprintk("lockd: TEST4 called\n"); dprintk("lockd: TEST4 called\n");
resp->cookie = argp->cookie; resp->cookie = argp->cookie;
@ -91,7 +92,7 @@ nlm4svc_proc_test(struct svc_rqst *rqstp, struct nlm_args *argp,
/* Don't accept test requests during grace period */ /* Don't accept test requests during grace period */
if (nlmsvc_grace_period) { if (nlmsvc_grace_period) {
resp->status = nlm_lck_denied_grace_period; resp->status = nlm_lck_denied_grace_period;
return rpc_success; return rc;
} }
/* Obtain client and file */ /* Obtain client and file */
@ -101,12 +102,13 @@ nlm4svc_proc_test(struct svc_rqst *rqstp, struct nlm_args *argp,
/* Now check for conflicting locks */ /* Now check for conflicting locks */
resp->status = nlmsvc_testlock(rqstp, file, &argp->lock, &resp->lock, &resp->cookie); resp->status = nlmsvc_testlock(rqstp, file, &argp->lock, &resp->lock, &resp->cookie);
if (resp->status == nlm_drop_reply) if (resp->status == nlm_drop_reply)
return rpc_drop_reply; rc = rpc_drop_reply;
else
dprintk("lockd: TEST4 status %d\n", ntohl(resp->status));
dprintk("lockd: TEST4 status %d\n", ntohl(resp->status));
nlm_release_host(host); nlm_release_host(host);
nlm_release_file(file); nlm_release_file(file);
return rpc_success; return rc;
} }
static __be32 static __be32
@ -115,6 +117,7 @@ nlm4svc_proc_lock(struct svc_rqst *rqstp, struct nlm_args *argp,
{ {
struct nlm_host *host; struct nlm_host *host;
struct nlm_file *file; struct nlm_file *file;
int rc = rpc_success;
dprintk("lockd: LOCK called\n"); dprintk("lockd: LOCK called\n");
@ -123,7 +126,7 @@ nlm4svc_proc_lock(struct svc_rqst *rqstp, struct nlm_args *argp,
/* Don't accept new lock requests during grace period */ /* Don't accept new lock requests during grace period */
if (nlmsvc_grace_period && !argp->reclaim) { if (nlmsvc_grace_period && !argp->reclaim) {
resp->status = nlm_lck_denied_grace_period; resp->status = nlm_lck_denied_grace_period;
return rpc_success; return rc;
} }
/* Obtain client and file */ /* Obtain client and file */
@ -146,12 +149,13 @@ nlm4svc_proc_lock(struct svc_rqst *rqstp, struct nlm_args *argp,
resp->status = nlmsvc_lock(rqstp, file, &argp->lock, resp->status = nlmsvc_lock(rqstp, file, &argp->lock,
argp->block, &argp->cookie); argp->block, &argp->cookie);
if (resp->status == nlm_drop_reply) if (resp->status == nlm_drop_reply)
return rpc_drop_reply; rc = rpc_drop_reply;
else
dprintk("lockd: LOCK status %d\n", ntohl(resp->status));
dprintk("lockd: LOCK status %d\n", ntohl(resp->status));
nlm_release_host(host); nlm_release_host(host);
nlm_release_file(file); nlm_release_file(file);
return rpc_success; return rc;
} }
static __be32 static __be32

View File

@ -501,25 +501,29 @@ nlmsvc_testlock(struct svc_rqst *rqstp, struct nlm_file *file,
block, block->b_flags, block->b_fl); block, block->b_flags, block->b_fl);
if (block->b_flags & B_TIMED_OUT) { if (block->b_flags & B_TIMED_OUT) {
nlmsvc_unlink_block(block); nlmsvc_unlink_block(block);
return nlm_lck_denied; ret = nlm_lck_denied;
goto out;
} }
if (block->b_flags & B_GOT_CALLBACK) { if (block->b_flags & B_GOT_CALLBACK) {
nlmsvc_unlink_block(block);
if (block->b_fl != NULL if (block->b_fl != NULL
&& block->b_fl->fl_type != F_UNLCK) { && block->b_fl->fl_type != F_UNLCK) {
lock->fl = *block->b_fl; lock->fl = *block->b_fl;
goto conf_lock; goto conf_lock;
} } else {
else { ret = nlm_granted;
nlmsvc_unlink_block(block); goto out;
return nlm_granted;
} }
} }
return nlm_drop_reply; ret = nlm_drop_reply;
goto out;
} }
error = vfs_test_lock(file->f_file, &lock->fl); error = vfs_test_lock(file->f_file, &lock->fl);
if (error == -EINPROGRESS) if (error == -EINPROGRESS) {
return nlmsvc_defer_lock_rqst(rqstp, block); ret = nlmsvc_defer_lock_rqst(rqstp, block);
goto out;
}
if (error) { if (error) {
ret = nlm_lck_denied_nolocks; ret = nlm_lck_denied_nolocks;
goto out; goto out;

View File

@ -113,6 +113,7 @@ nlmsvc_proc_test(struct svc_rqst *rqstp, struct nlm_args *argp,
{ {
struct nlm_host *host; struct nlm_host *host;
struct nlm_file *file; struct nlm_file *file;
int rc = rpc_success;
dprintk("lockd: TEST called\n"); dprintk("lockd: TEST called\n");
resp->cookie = argp->cookie; resp->cookie = argp->cookie;
@ -120,7 +121,7 @@ nlmsvc_proc_test(struct svc_rqst *rqstp, struct nlm_args *argp,
/* Don't accept test requests during grace period */ /* Don't accept test requests during grace period */
if (nlmsvc_grace_period) { if (nlmsvc_grace_period) {
resp->status = nlm_lck_denied_grace_period; resp->status = nlm_lck_denied_grace_period;
return rpc_success; return rc;
} }
/* Obtain client and file */ /* Obtain client and file */
@ -130,13 +131,14 @@ nlmsvc_proc_test(struct svc_rqst *rqstp, struct nlm_args *argp,
/* Now check for conflicting locks */ /* Now check for conflicting locks */
resp->status = cast_status(nlmsvc_testlock(rqstp, file, &argp->lock, &resp->lock, &resp->cookie)); resp->status = cast_status(nlmsvc_testlock(rqstp, file, &argp->lock, &resp->lock, &resp->cookie));
if (resp->status == nlm_drop_reply) if (resp->status == nlm_drop_reply)
return rpc_drop_reply; rc = rpc_drop_reply;
else
dprintk("lockd: TEST status %d vers %d\n",
ntohl(resp->status), rqstp->rq_vers);
dprintk("lockd: TEST status %d vers %d\n",
ntohl(resp->status), rqstp->rq_vers);
nlm_release_host(host); nlm_release_host(host);
nlm_release_file(file); nlm_release_file(file);
return rpc_success; return rc;
} }
static __be32 static __be32
@ -145,6 +147,7 @@ nlmsvc_proc_lock(struct svc_rqst *rqstp, struct nlm_args *argp,
{ {
struct nlm_host *host; struct nlm_host *host;
struct nlm_file *file; struct nlm_file *file;
int rc = rpc_success;
dprintk("lockd: LOCK called\n"); dprintk("lockd: LOCK called\n");
@ -153,7 +156,7 @@ nlmsvc_proc_lock(struct svc_rqst *rqstp, struct nlm_args *argp,
/* Don't accept new lock requests during grace period */ /* Don't accept new lock requests during grace period */
if (nlmsvc_grace_period && !argp->reclaim) { if (nlmsvc_grace_period && !argp->reclaim) {
resp->status = nlm_lck_denied_grace_period; resp->status = nlm_lck_denied_grace_period;
return rpc_success; return rc;
} }
/* Obtain client and file */ /* Obtain client and file */
@ -176,12 +179,13 @@ nlmsvc_proc_lock(struct svc_rqst *rqstp, struct nlm_args *argp,
resp->status = cast_status(nlmsvc_lock(rqstp, file, &argp->lock, resp->status = cast_status(nlmsvc_lock(rqstp, file, &argp->lock,
argp->block, &argp->cookie)); argp->block, &argp->cookie));
if (resp->status == nlm_drop_reply) if (resp->status == nlm_drop_reply)
return rpc_drop_reply; rc = rpc_drop_reply;
else
dprintk("lockd: LOCK status %d\n", ntohl(resp->status));
dprintk("lockd: LOCK status %d\n", ntohl(resp->status));
nlm_release_host(host); nlm_release_host(host);
nlm_release_file(file); nlm_release_file(file);
return rpc_success; return rc;
} }
static __be32 static __be32

View File

@ -87,7 +87,7 @@ nlm_lookup_file(struct svc_rqst *rqstp, struct nlm_file **result,
unsigned int hash; unsigned int hash;
__be32 nfserr; __be32 nfserr;
nlm_debug_print_fh("nlm_file_lookup", f); nlm_debug_print_fh("nlm_lookup_file", f);
hash = file_hash(f); hash = file_hash(f);

View File

@ -119,8 +119,8 @@ int nfs_callback_up(void)
if (!serv) if (!serv)
goto out_err; goto out_err;
ret = svc_makesock(serv, IPPROTO_TCP, nfs_callback_set_tcpport, ret = svc_create_xprt(serv, "tcp", nfs_callback_set_tcpport,
SVC_SOCK_ANONYMOUS); SVC_SOCK_ANONYMOUS);
if (ret <= 0) if (ret <= 0)
goto out_destroy; goto out_destroy;
nfs_callback_tcpport = ret; nfs_callback_tcpport = ret;

View File

@ -1,6 +1,4 @@
/* /*
* include/linux/nfsd/auth.h
*
* nfsd-specific authentication stuff. * nfsd-specific authentication stuff.
* uid/gid mapping not yet implemented. * uid/gid mapping not yet implemented.
* *
@ -10,8 +8,6 @@
#ifndef LINUX_NFSD_AUTH_H #ifndef LINUX_NFSD_AUTH_H
#define LINUX_NFSD_AUTH_H #define LINUX_NFSD_AUTH_H
#ifdef __KERNEL__
#define nfsd_luid(rq, uid) ((u32)(uid)) #define nfsd_luid(rq, uid) ((u32)(uid))
#define nfsd_lgid(rq, gid) ((u32)(gid)) #define nfsd_lgid(rq, gid) ((u32)(gid))
#define nfsd_ruid(rq, uid) ((u32)(uid)) #define nfsd_ruid(rq, uid) ((u32)(uid))
@ -23,5 +19,4 @@
*/ */
int nfsd_setuser(struct svc_rqst *, struct svc_export *); int nfsd_setuser(struct svc_rqst *, struct svc_export *);
#endif /* __KERNEL__ */
#endif /* LINUX_NFSD_AUTH_H */ #endif /* LINUX_NFSD_AUTH_H */

View File

@ -1357,8 +1357,6 @@ exp_pseudoroot(struct svc_rqst *rqstp, struct svc_fh *fhp)
mk_fsid(FSID_NUM, fsidv, 0, 0, 0, NULL); mk_fsid(FSID_NUM, fsidv, 0, 0, 0, NULL);
exp = rqst_exp_find(rqstp, FSID_NUM, fsidv); exp = rqst_exp_find(rqstp, FSID_NUM, fsidv);
if (PTR_ERR(exp) == -ENOENT)
return nfserr_perm;
if (IS_ERR(exp)) if (IS_ERR(exp))
return nfserrno(PTR_ERR(exp)); return nfserrno(PTR_ERR(exp));
rv = fh_compose(fhp, exp, exp->ex_dentry, NULL); rv = fh_compose(fhp, exp, exp->ex_dentry, NULL);
@ -1637,13 +1635,19 @@ exp_verify_string(char *cp, int max)
/* /*
* Initialize the exports module. * Initialize the exports module.
*/ */
void int
nfsd_export_init(void) nfsd_export_init(void)
{ {
int rv;
dprintk("nfsd: initializing export module.\n"); dprintk("nfsd: initializing export module.\n");
cache_register(&svc_export_cache); rv = cache_register(&svc_export_cache);
cache_register(&svc_expkey_cache); if (rv)
return rv;
rv = cache_register(&svc_expkey_cache);
if (rv)
cache_unregister(&svc_export_cache);
return rv;
} }
@ -1670,10 +1674,8 @@ nfsd_export_shutdown(void)
exp_writelock(); exp_writelock();
if (cache_unregister(&svc_expkey_cache)) cache_unregister(&svc_expkey_cache);
printk(KERN_ERR "nfsd: failed to unregister expkey cache\n"); cache_unregister(&svc_export_cache);
if (cache_unregister(&svc_export_cache))
printk(KERN_ERR "nfsd: failed to unregister export cache\n");
svcauth_unix_purge(); svcauth_unix_purge();
exp_writeunlock(); exp_writeunlock();

View File

@ -221,12 +221,17 @@ static int nfsaclsvc_encode_getaclres(struct svc_rqst *rqstp, __be32 *p,
struct nfsd3_getaclres *resp) struct nfsd3_getaclres *resp)
{ {
struct dentry *dentry = resp->fh.fh_dentry; struct dentry *dentry = resp->fh.fh_dentry;
struct inode *inode = dentry->d_inode; struct inode *inode;
struct kvec *head = rqstp->rq_res.head; struct kvec *head = rqstp->rq_res.head;
unsigned int base; unsigned int base;
int n; int n;
int w; int w;
/*
* Since this is version 2, the check for nfserr in
* nfsd_dispatch actually ensures the following cannot happen.
* However, it seems fragile to depend on that.
*/
if (dentry == NULL || dentry->d_inode == NULL) if (dentry == NULL || dentry->d_inode == NULL)
return 0; return 0;
inode = dentry->d_inode; inode = dentry->d_inode;

View File

@ -21,6 +21,7 @@
#include <linux/sunrpc/svc.h> #include <linux/sunrpc/svc.h>
#include <linux/nfsd/nfsd.h> #include <linux/nfsd/nfsd.h>
#include <linux/nfsd/xdr3.h> #include <linux/nfsd/xdr3.h>
#include "auth.h"
#define NFSDDBG_FACILITY NFSDDBG_XDR #define NFSDDBG_FACILITY NFSDDBG_XDR
@ -88,10 +89,10 @@ encode_fh(__be32 *p, struct svc_fh *fhp)
* no slashes or null bytes. * no slashes or null bytes.
*/ */
static __be32 * static __be32 *
decode_filename(__be32 *p, char **namp, int *lenp) decode_filename(__be32 *p, char **namp, unsigned int *lenp)
{ {
char *name; char *name;
int i; unsigned int i;
if ((p = xdr_decode_string_inplace(p, namp, lenp, NFS3_MAXNAMLEN)) != NULL) { if ((p = xdr_decode_string_inplace(p, namp, lenp, NFS3_MAXNAMLEN)) != NULL) {
for (i = 0, name = *namp; i < *lenp; i++, name++) { for (i = 0, name = *namp; i < *lenp; i++, name++) {
@ -452,8 +453,7 @@ int
nfs3svc_decode_symlinkargs(struct svc_rqst *rqstp, __be32 *p, nfs3svc_decode_symlinkargs(struct svc_rqst *rqstp, __be32 *p,
struct nfsd3_symlinkargs *args) struct nfsd3_symlinkargs *args)
{ {
unsigned int len; unsigned int len, avail;
int avail;
char *old, *new; char *old, *new;
struct kvec *vec; struct kvec *vec;
@ -486,7 +486,8 @@ nfs3svc_decode_symlinkargs(struct svc_rqst *rqstp, __be32 *p,
/* now copy next page if there is one */ /* now copy next page if there is one */
if (len && !avail && rqstp->rq_arg.page_len) { if (len && !avail && rqstp->rq_arg.page_len) {
avail = rqstp->rq_arg.page_len; avail = rqstp->rq_arg.page_len;
if (avail > PAGE_SIZE) avail = PAGE_SIZE; if (avail > PAGE_SIZE)
avail = PAGE_SIZE;
old = page_address(rqstp->rq_arg.pages[0]); old = page_address(rqstp->rq_arg.pages[0]);
} }
while (len && avail && *old) { while (len && avail && *old) {
@ -816,11 +817,11 @@ static __be32 *
encode_entryplus_baggage(struct nfsd3_readdirres *cd, __be32 *p, encode_entryplus_baggage(struct nfsd3_readdirres *cd, __be32 *p,
struct svc_fh *fhp) struct svc_fh *fhp)
{ {
p = encode_post_op_attr(cd->rqstp, p, fhp); p = encode_post_op_attr(cd->rqstp, p, fhp);
*p++ = xdr_one; /* yes, a file handle follows */ *p++ = xdr_one; /* yes, a file handle follows */
p = encode_fh(p, fhp); p = encode_fh(p, fhp);
fh_put(fhp); fh_put(fhp);
return p; return p;
} }
static int static int

View File

@ -350,30 +350,6 @@ static struct rpc_version * nfs_cb_version[] = {
static int do_probe_callback(void *data) static int do_probe_callback(void *data)
{ {
struct nfs4_client *clp = data; struct nfs4_client *clp = data;
struct nfs4_callback *cb = &clp->cl_callback;
struct rpc_message msg = {
.rpc_proc = &nfs4_cb_procedures[NFSPROC4_CLNT_CB_NULL],
.rpc_argp = clp,
};
int status;
status = rpc_call_sync(cb->cb_client, &msg, RPC_TASK_SOFT);
if (status) {
rpc_shutdown_client(cb->cb_client);
cb->cb_client = NULL;
} else
atomic_set(&cb->cb_set, 1);
put_nfs4_client(clp);
return 0;
}
/*
* Set up the callback client and put a NFSPROC4_CB_NULL on the wire...
*/
void
nfsd4_probe_callback(struct nfs4_client *clp)
{
struct sockaddr_in addr; struct sockaddr_in addr;
struct nfs4_callback *cb = &clp->cl_callback; struct nfs4_callback *cb = &clp->cl_callback;
struct rpc_timeout timeparms = { struct rpc_timeout timeparms = {
@ -390,13 +366,15 @@ nfsd4_probe_callback(struct nfs4_client *clp)
.timeout = &timeparms, .timeout = &timeparms,
.program = program, .program = program,
.version = nfs_cb_version[1]->number, .version = nfs_cb_version[1]->number,
.authflavor = RPC_AUTH_UNIX, /* XXX: need AUTH_GSS... */ .authflavor = RPC_AUTH_UNIX, /* XXX: need AUTH_GSS... */
.flags = (RPC_CLNT_CREATE_NOPING), .flags = (RPC_CLNT_CREATE_NOPING),
}; };
struct task_struct *t; struct rpc_message msg = {
.rpc_proc = &nfs4_cb_procedures[NFSPROC4_CLNT_CB_NULL],
if (atomic_read(&cb->cb_set)) .rpc_argp = clp,
return; };
struct rpc_clnt *client;
int status;
/* Initialize address */ /* Initialize address */
memset(&addr, 0, sizeof(addr)); memset(&addr, 0, sizeof(addr));
@ -416,29 +394,50 @@ nfsd4_probe_callback(struct nfs4_client *clp)
program->stats->program = program; program->stats->program = program;
/* Create RPC client */ /* Create RPC client */
cb->cb_client = rpc_create(&args); client = rpc_create(&args);
if (IS_ERR(cb->cb_client)) { if (IS_ERR(client)) {
dprintk("NFSD: couldn't create callback client\n"); dprintk("NFSD: couldn't create callback client\n");
status = PTR_ERR(client);
goto out_err; goto out_err;
} }
status = rpc_call_sync(client, &msg, RPC_TASK_SOFT);
if (status)
goto out_release_client;
cb->cb_client = client;
atomic_set(&cb->cb_set, 1);
put_nfs4_client(clp);
return 0;
out_release_client:
rpc_shutdown_client(client);
out_err:
put_nfs4_client(clp);
dprintk("NFSD: warning: no callback path to client %.*s\n",
(int)clp->cl_name.len, clp->cl_name.data);
return status;
}
/*
* Set up the callback client and put a NFSPROC4_CB_NULL on the wire...
*/
void
nfsd4_probe_callback(struct nfs4_client *clp)
{
struct task_struct *t;
BUG_ON(atomic_read(&clp->cl_callback.cb_set));
/* the task holds a reference to the nfs4_client struct */ /* the task holds a reference to the nfs4_client struct */
atomic_inc(&clp->cl_count); atomic_inc(&clp->cl_count);
t = kthread_run(do_probe_callback, clp, "nfs4_cb_probe"); t = kthread_run(do_probe_callback, clp, "nfs4_cb_probe");
if (IS_ERR(t)) if (IS_ERR(t))
goto out_release_clp; atomic_dec(&clp->cl_count);
return; return;
out_release_clp:
atomic_dec(&clp->cl_count);
rpc_shutdown_client(cb->cb_client);
out_err:
cb->cb_client = NULL;
dprintk("NFSD: warning: no callback path to client %.*s\n",
(int)clp->cl_name.len, clp->cl_name.data);
} }
/* /*
@ -458,9 +457,6 @@ nfsd4_cb_recall(struct nfs4_delegation *dp)
int retries = 1; int retries = 1;
int status = 0; int status = 0;
if ((!atomic_read(&clp->cl_callback.cb_set)) || !clnt)
return;
cbr->cbr_trunc = 0; /* XXX need to implement truncate optimization */ cbr->cbr_trunc = 0; /* XXX need to implement truncate optimization */
cbr->cbr_dp = dp; cbr->cbr_dp = dp;
@ -469,6 +465,7 @@ nfsd4_cb_recall(struct nfs4_delegation *dp)
switch (status) { switch (status) {
case -EIO: case -EIO:
/* Network partition? */ /* Network partition? */
atomic_set(&clp->cl_callback.cb_set, 0);
case -EBADHANDLE: case -EBADHANDLE:
case -NFS4ERR_BAD_STATEID: case -NFS4ERR_BAD_STATEID:
/* Race: client probably got cb_recall /* Race: client probably got cb_recall
@ -481,11 +478,10 @@ nfsd4_cb_recall(struct nfs4_delegation *dp)
status = rpc_call_sync(clnt, &msg, RPC_TASK_SOFT); status = rpc_call_sync(clnt, &msg, RPC_TASK_SOFT);
} }
out_put_cred: out_put_cred:
if (status == -EIO) /*
atomic_set(&clp->cl_callback.cb_set, 0); * Success or failure, now we're either waiting for lease expiration
/* Success or failure, now we're either waiting for lease expiration * or deleg_return.
* or deleg_return. */ */
dprintk("NFSD: nfs4_cb_recall: dp %p dl_flock %p dl_count %d\n",dp, dp->dl_flock, atomic_read(&dp->dl_count));
put_nfs4_client(clp); put_nfs4_client(clp);
nfs4_put_delegation(dp); nfs4_put_delegation(dp);
return; return;

View File

@ -255,13 +255,10 @@ idtoname_parse(struct cache_detail *cd, char *buf, int buflen)
goto out; goto out;
if (len == 0) if (len == 0)
set_bit(CACHE_NEGATIVE, &ent.h.flags); set_bit(CACHE_NEGATIVE, &ent.h.flags);
else { else if (len >= IDMAP_NAMESZ)
if (error >= IDMAP_NAMESZ) { goto out;
error = -EINVAL; else
goto out;
}
memcpy(ent.name, buf1, sizeof(ent.name)); memcpy(ent.name, buf1, sizeof(ent.name));
}
error = -ENOMEM; error = -ENOMEM;
res = idtoname_update(&ent, res); res = idtoname_update(&ent, res);
if (res == NULL) if (res == NULL)
@ -467,20 +464,25 @@ nametoid_update(struct ent *new, struct ent *old)
* Exported API * Exported API
*/ */
void int
nfsd_idmap_init(void) nfsd_idmap_init(void)
{ {
cache_register(&idtoname_cache); int rv;
cache_register(&nametoid_cache);
rv = cache_register(&idtoname_cache);
if (rv)
return rv;
rv = cache_register(&nametoid_cache);
if (rv)
cache_unregister(&idtoname_cache);
return rv;
} }
void void
nfsd_idmap_shutdown(void) nfsd_idmap_shutdown(void)
{ {
if (cache_unregister(&idtoname_cache)) cache_unregister(&idtoname_cache);
printk(KERN_ERR "nfsd: failed to unregister idtoname cache\n"); cache_unregister(&nametoid_cache);
if (cache_unregister(&nametoid_cache))
printk(KERN_ERR "nfsd: failed to unregister nametoid cache\n");
} }
/* /*

View File

@ -750,7 +750,7 @@ _nfsd4_verify(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
cstate->current_fh.fh_export, cstate->current_fh.fh_export,
cstate->current_fh.fh_dentry, buf, cstate->current_fh.fh_dentry, buf,
&count, verify->ve_bmval, &count, verify->ve_bmval,
rqstp); rqstp, 0);
/* this means that nfsd4_encode_fattr() ran out of space */ /* this means that nfsd4_encode_fattr() ran out of space */
if (status == nfserr_resource && count == 0) if (status == nfserr_resource && count == 0)

View File

@ -61,7 +61,6 @@ static time_t lease_time = 90; /* default lease time */
static time_t user_lease_time = 90; static time_t user_lease_time = 90;
static time_t boot_time; static time_t boot_time;
static int in_grace = 1; static int in_grace = 1;
static u32 current_clientid = 1;
static u32 current_ownerid = 1; static u32 current_ownerid = 1;
static u32 current_fileid = 1; static u32 current_fileid = 1;
static u32 current_delegid = 1; static u32 current_delegid = 1;
@ -340,21 +339,20 @@ STALE_CLIENTID(clientid_t *clid)
* This type of memory management is somewhat inefficient, but we use it * This type of memory management is somewhat inefficient, but we use it
* anyway since SETCLIENTID is not a common operation. * anyway since SETCLIENTID is not a common operation.
*/ */
static inline struct nfs4_client * static struct nfs4_client *alloc_client(struct xdr_netobj name)
alloc_client(struct xdr_netobj name)
{ {
struct nfs4_client *clp; struct nfs4_client *clp;
if ((clp = kzalloc(sizeof(struct nfs4_client), GFP_KERNEL))!= NULL) { clp = kzalloc(sizeof(struct nfs4_client), GFP_KERNEL);
if ((clp->cl_name.data = kmalloc(name.len, GFP_KERNEL)) != NULL) { if (clp == NULL)
memcpy(clp->cl_name.data, name.data, name.len); return NULL;
clp->cl_name.len = name.len; clp->cl_name.data = kmalloc(name.len, GFP_KERNEL);
} if (clp->cl_name.data == NULL) {
else { kfree(clp);
kfree(clp); return NULL;
clp = NULL;
}
} }
memcpy(clp->cl_name.data, name.data, name.len);
clp->cl_name.len = name.len;
return clp; return clp;
} }
@ -363,8 +361,11 @@ shutdown_callback_client(struct nfs4_client *clp)
{ {
struct rpc_clnt *clnt = clp->cl_callback.cb_client; struct rpc_clnt *clnt = clp->cl_callback.cb_client;
/* shutdown rpc client, ending any outstanding recall rpcs */
if (clnt) { if (clnt) {
/*
* Callback threads take a reference on the client, so there
* should be no outstanding callbacks at this point.
*/
clp->cl_callback.cb_client = NULL; clp->cl_callback.cb_client = NULL;
rpc_shutdown_client(clnt); rpc_shutdown_client(clnt);
} }
@ -422,12 +423,13 @@ expire_client(struct nfs4_client *clp)
put_nfs4_client(clp); put_nfs4_client(clp);
} }
static struct nfs4_client * static struct nfs4_client *create_client(struct xdr_netobj name, char *recdir)
create_client(struct xdr_netobj name, char *recdir) { {
struct nfs4_client *clp; struct nfs4_client *clp;
if (!(clp = alloc_client(name))) clp = alloc_client(name);
goto out; if (clp == NULL)
return NULL;
memcpy(clp->cl_recdir, recdir, HEXDIR_LEN); memcpy(clp->cl_recdir, recdir, HEXDIR_LEN);
atomic_set(&clp->cl_count, 1); atomic_set(&clp->cl_count, 1);
atomic_set(&clp->cl_callback.cb_set, 0); atomic_set(&clp->cl_callback.cb_set, 0);
@ -436,32 +438,30 @@ create_client(struct xdr_netobj name, char *recdir) {
INIT_LIST_HEAD(&clp->cl_openowners); INIT_LIST_HEAD(&clp->cl_openowners);
INIT_LIST_HEAD(&clp->cl_delegations); INIT_LIST_HEAD(&clp->cl_delegations);
INIT_LIST_HEAD(&clp->cl_lru); INIT_LIST_HEAD(&clp->cl_lru);
out:
return clp; return clp;
} }
static void static void copy_verf(struct nfs4_client *target, nfs4_verifier *source)
copy_verf(struct nfs4_client *target, nfs4_verifier *source) { {
memcpy(target->cl_verifier.data, source->data, sizeof(target->cl_verifier.data)); memcpy(target->cl_verifier.data, source->data,
sizeof(target->cl_verifier.data));
} }
static void static void copy_clid(struct nfs4_client *target, struct nfs4_client *source)
copy_clid(struct nfs4_client *target, struct nfs4_client *source) { {
target->cl_clientid.cl_boot = source->cl_clientid.cl_boot; target->cl_clientid.cl_boot = source->cl_clientid.cl_boot;
target->cl_clientid.cl_id = source->cl_clientid.cl_id; target->cl_clientid.cl_id = source->cl_clientid.cl_id;
} }
static void static void copy_cred(struct svc_cred *target, struct svc_cred *source)
copy_cred(struct svc_cred *target, struct svc_cred *source) { {
target->cr_uid = source->cr_uid; target->cr_uid = source->cr_uid;
target->cr_gid = source->cr_gid; target->cr_gid = source->cr_gid;
target->cr_group_info = source->cr_group_info; target->cr_group_info = source->cr_group_info;
get_group_info(target->cr_group_info); get_group_info(target->cr_group_info);
} }
static inline int static int same_name(const char *n1, const char *n2)
same_name(const char *n1, const char *n2)
{ {
return 0 == memcmp(n1, n2, HEXDIR_LEN); return 0 == memcmp(n1, n2, HEXDIR_LEN);
} }
@ -485,26 +485,26 @@ same_creds(struct svc_cred *cr1, struct svc_cred *cr2)
return cr1->cr_uid == cr2->cr_uid; return cr1->cr_uid == cr2->cr_uid;
} }
static void static void gen_clid(struct nfs4_client *clp)
gen_clid(struct nfs4_client *clp) { {
static u32 current_clientid = 1;
clp->cl_clientid.cl_boot = boot_time; clp->cl_clientid.cl_boot = boot_time;
clp->cl_clientid.cl_id = current_clientid++; clp->cl_clientid.cl_id = current_clientid++;
} }
static void static void gen_confirm(struct nfs4_client *clp)
gen_confirm(struct nfs4_client *clp) { {
struct timespec tv; static u32 i;
u32 * p; u32 *p;
tv = CURRENT_TIME;
p = (u32 *)clp->cl_confirm.data; p = (u32 *)clp->cl_confirm.data;
*p++ = tv.tv_sec; *p++ = get_seconds();
*p++ = tv.tv_nsec; *p++ = i++;
} }
static int static int check_name(struct xdr_netobj name)
check_name(struct xdr_netobj name) { {
if (name.len == 0) if (name.len == 0)
return 0; return 0;
if (name.len > NFS4_OPAQUE_LIMIT) { if (name.len > NFS4_OPAQUE_LIMIT) {
@ -683,39 +683,6 @@ out_err:
return; return;
} }
/*
* RFC 3010 has a complex implmentation description of processing a
* SETCLIENTID request consisting of 5 bullets, labeled as
* CASE0 - CASE4 below.
*
* NOTES:
* callback information will be processed in a future patch
*
* an unconfirmed record is added when:
* NORMAL (part of CASE 4): there is no confirmed nor unconfirmed record.
* CASE 1: confirmed record found with matching name, principal,
* verifier, and clientid.
* CASE 2: confirmed record found with matching name, principal,
* and there is no unconfirmed record with matching
* name and principal
*
* an unconfirmed record is replaced when:
* CASE 3: confirmed record found with matching name, principal,
* and an unconfirmed record is found with matching
* name, principal, and with clientid and
* confirm that does not match the confirmed record.
* CASE 4: there is no confirmed record with matching name and
* principal. there is an unconfirmed record with
* matching name, principal.
*
* an unconfirmed record is deleted when:
* CASE 1: an unconfirmed record that matches input name, verifier,
* and confirmed clientid.
* CASE 4: any unconfirmed records with matching name and principal
* that exist after an unconfirmed record has been replaced
* as described above.
*
*/
__be32 __be32
nfsd4_setclientid(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate, nfsd4_setclientid(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
struct nfsd4_setclientid *setclid) struct nfsd4_setclientid *setclid)
@ -748,11 +715,7 @@ nfsd4_setclientid(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
nfs4_lock_state(); nfs4_lock_state();
conf = find_confirmed_client_by_str(dname, strhashval); conf = find_confirmed_client_by_str(dname, strhashval);
if (conf) { if (conf) {
/* /* RFC 3530 14.2.33 CASE 0: */
* CASE 0:
* clname match, confirmed, different principal
* or different ip_address
*/
status = nfserr_clid_inuse; status = nfserr_clid_inuse;
if (!same_creds(&conf->cl_cred, &rqstp->rq_cred) if (!same_creds(&conf->cl_cred, &rqstp->rq_cred)
|| conf->cl_addr != sin->sin_addr.s_addr) { || conf->cl_addr != sin->sin_addr.s_addr) {
@ -761,12 +724,17 @@ nfsd4_setclientid(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
goto out; goto out;
} }
} }
/*
* section 14.2.33 of RFC 3530 (under the heading "IMPLEMENTATION")
* has a description of SETCLIENTID request processing consisting
* of 5 bullet points, labeled as CASE0 - CASE4 below.
*/
unconf = find_unconfirmed_client_by_str(dname, strhashval); unconf = find_unconfirmed_client_by_str(dname, strhashval);
status = nfserr_resource; status = nfserr_resource;
if (!conf) { if (!conf) {
/* /*
* CASE 4: * RFC 3530 14.2.33 CASE 4:
* placed first, because it is the normal case. * placed first, because it is the normal case
*/ */
if (unconf) if (unconf)
expire_client(unconf); expire_client(unconf);
@ -776,17 +744,8 @@ nfsd4_setclientid(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
gen_clid(new); gen_clid(new);
} else if (same_verf(&conf->cl_verifier, &clverifier)) { } else if (same_verf(&conf->cl_verifier, &clverifier)) {
/* /*
* CASE 1: * RFC 3530 14.2.33 CASE 1:
* cl_name match, confirmed, principal match * probable callback update
* verifier match: probable callback update
*
* remove any unconfirmed nfs4_client with
* matching cl_name, cl_verifier, and cl_clientid
*
* create and insert an unconfirmed nfs4_client with same
* cl_name, cl_verifier, and cl_clientid as existing
* nfs4_client, but with the new callback info and a
* new cl_confirm
*/ */
if (unconf) { if (unconf) {
/* Note this is removing unconfirmed {*x***}, /* Note this is removing unconfirmed {*x***},
@ -802,43 +761,25 @@ nfsd4_setclientid(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
copy_clid(new, conf); copy_clid(new, conf);
} else if (!unconf) { } else if (!unconf) {
/* /*
* CASE 2: * RFC 3530 14.2.33 CASE 2:
* clname match, confirmed, principal match * probable client reboot; state will be removed if
* verfier does not match * confirmed.
* no unconfirmed. create a new unconfirmed nfs4_client
* using input clverifier, clname, and callback info
* and generate a new cl_clientid and cl_confirm.
*/ */
new = create_client(clname, dname); new = create_client(clname, dname);
if (new == NULL) if (new == NULL)
goto out; goto out;
gen_clid(new); gen_clid(new);
} else if (!same_verf(&conf->cl_confirm, &unconf->cl_confirm)) { } else {
/* /*
* CASE3: * RFC 3530 14.2.33 CASE 3:
* confirmed found (name, principal match) * probable client reboot; state will be removed if
* confirmed verifier does not match input clverifier * confirmed.
*
* unconfirmed found (name match)
* confirmed->cl_confirm != unconfirmed->cl_confirm
*
* remove unconfirmed.
*
* create an unconfirmed nfs4_client
* with same cl_name as existing confirmed nfs4_client,
* but with new callback info, new cl_clientid,
* new cl_verifier and a new cl_confirm
*/ */
expire_client(unconf); expire_client(unconf);
new = create_client(clname, dname); new = create_client(clname, dname);
if (new == NULL) if (new == NULL)
goto out; goto out;
gen_clid(new); gen_clid(new);
} else {
/* No cases hit !!! */
status = nfserr_inval;
goto out;
} }
copy_verf(new, &clverifier); copy_verf(new, &clverifier);
new->cl_addr = sin->sin_addr.s_addr; new->cl_addr = sin->sin_addr.s_addr;
@ -857,11 +798,9 @@ out:
/* /*
* RFC 3010 has a complex implmentation description of processing a * Section 14.2.34 of RFC 3530 (under the heading "IMPLEMENTATION") has
* SETCLIENTID_CONFIRM request consisting of 4 bullets describing * a description of SETCLIENTID_CONFIRM request processing consisting of 4
* processing on a DRC miss, labeled as CASE1 - CASE4 below. * bullets, labeled as CASE1 - CASE4 below.
*
* NOTE: callback information will be processed here in a future patch
*/ */
__be32 __be32
nfsd4_setclientid_confirm(struct svc_rqst *rqstp, nfsd4_setclientid_confirm(struct svc_rqst *rqstp,
@ -892,16 +831,16 @@ nfsd4_setclientid_confirm(struct svc_rqst *rqstp,
if (unconf && unconf->cl_addr != sin->sin_addr.s_addr) if (unconf && unconf->cl_addr != sin->sin_addr.s_addr)
goto out; goto out;
if ((conf && unconf) && /*
(same_verf(&unconf->cl_confirm, &confirm)) && * section 14.2.34 of RFC 3530 has a description of
(same_verf(&conf->cl_verifier, &unconf->cl_verifier)) && * SETCLIENTID_CONFIRM request processing consisting
(same_name(conf->cl_recdir,unconf->cl_recdir)) && * of 4 bullet points, labeled as CASE1 - CASE4 below.
(!same_verf(&conf->cl_confirm, &unconf->cl_confirm))) { */
/* CASE 1: if (conf && unconf && same_verf(&confirm, &unconf->cl_confirm)) {
* unconf record that matches input clientid and input confirm. /*
* conf record that matches input clientid. * RFC 3530 14.2.34 CASE 1:
* conf and unconf records match names, verifiers * callback update
*/ */
if (!same_creds(&conf->cl_cred, &unconf->cl_cred)) if (!same_creds(&conf->cl_cred, &unconf->cl_cred))
status = nfserr_clid_inuse; status = nfserr_clid_inuse;
else { else {
@ -914,15 +853,11 @@ nfsd4_setclientid_confirm(struct svc_rqst *rqstp,
status = nfs_ok; status = nfs_ok;
} }
} else if ((conf && !unconf) || } else if (conf && !unconf) {
((conf && unconf) && /*
(!same_verf(&conf->cl_verifier, &unconf->cl_verifier) || * RFC 3530 14.2.34 CASE 2:
!same_name(conf->cl_recdir, unconf->cl_recdir)))) { * probable retransmitted request; play it safe and
/* CASE 2: * do nothing.
* conf record that matches input clientid.
* if unconf record matches input clientid, then
* unconf->cl_name or unconf->cl_verifier don't match the
* conf record.
*/ */
if (!same_creds(&conf->cl_cred, &rqstp->rq_cred)) if (!same_creds(&conf->cl_cred, &rqstp->rq_cred))
status = nfserr_clid_inuse; status = nfserr_clid_inuse;
@ -930,10 +865,9 @@ nfsd4_setclientid_confirm(struct svc_rqst *rqstp,
status = nfs_ok; status = nfs_ok;
} else if (!conf && unconf } else if (!conf && unconf
&& same_verf(&unconf->cl_confirm, &confirm)) { && same_verf(&unconf->cl_confirm, &confirm)) {
/* CASE 3: /*
* conf record not found. * RFC 3530 14.2.34 CASE 3:
* unconf record found. * Normal case; new or rebooted client:
* unconf->cl_confirm matches input confirm
*/ */
if (!same_creds(&unconf->cl_cred, &rqstp->rq_cred)) { if (!same_creds(&unconf->cl_cred, &rqstp->rq_cred)) {
status = nfserr_clid_inuse; status = nfserr_clid_inuse;
@ -948,16 +882,15 @@ nfsd4_setclientid_confirm(struct svc_rqst *rqstp,
} }
move_to_confirmed(unconf); move_to_confirmed(unconf);
conf = unconf; conf = unconf;
nfsd4_probe_callback(conf);
status = nfs_ok; status = nfs_ok;
} }
} else if ((!conf || (conf && !same_verf(&conf->cl_confirm, &confirm))) } else if ((!conf || (conf && !same_verf(&conf->cl_confirm, &confirm)))
&& (!unconf || (unconf && !same_verf(&unconf->cl_confirm, && (!unconf || (unconf && !same_verf(&unconf->cl_confirm,
&confirm)))) { &confirm)))) {
/* CASE 4: /*
* conf record not found, or if conf, conf->cl_confirm does not * RFC 3530 14.2.34 CASE 4:
* match input confirm. * Client probably hasn't noticed that we rebooted yet.
* unconf record not found, or if unconf, unconf->cl_confirm
* does not match input confirm.
*/ */
status = nfserr_stale_clientid; status = nfserr_stale_clientid;
} else { } else {
@ -965,8 +898,6 @@ nfsd4_setclientid_confirm(struct svc_rqst *rqstp,
status = nfserr_clid_inuse; status = nfserr_clid_inuse;
} }
out: out:
if (!status)
nfsd4_probe_callback(conf);
nfs4_unlock_state(); nfs4_unlock_state();
return status; return status;
} }
@ -1226,14 +1157,19 @@ find_file(struct inode *ino)
return NULL; return NULL;
} }
static int access_valid(u32 x) static inline int access_valid(u32 x)
{ {
return (x > 0 && x < 4); if (x < NFS4_SHARE_ACCESS_READ)
return 0;
if (x > NFS4_SHARE_ACCESS_BOTH)
return 0;
return 1;
} }
static int deny_valid(u32 x) static inline int deny_valid(u32 x)
{ {
return (x >= 0 && x < 5); /* Note: unlike access bits, deny bits may be zero. */
return x <= NFS4_SHARE_DENY_BOTH;
} }
static void static void
@ -2162,8 +2098,10 @@ nfs4_preprocess_seqid_op(struct svc_fh *current_fh, u32 seqid, stateid_t *statei
goto check_replay; goto check_replay;
} }
*stpp = stp;
*sopp = sop = stp->st_stateowner;
if (lock) { if (lock) {
struct nfs4_stateowner *sop = stp->st_stateowner;
clientid_t *lockclid = &lock->v.new.clientid; clientid_t *lockclid = &lock->v.new.clientid;
struct nfs4_client *clp = sop->so_client; struct nfs4_client *clp = sop->so_client;
int lkflg = 0; int lkflg = 0;
@ -2193,9 +2131,6 @@ nfs4_preprocess_seqid_op(struct svc_fh *current_fh, u32 seqid, stateid_t *statei
return nfserr_bad_stateid; return nfserr_bad_stateid;
} }
*stpp = stp;
*sopp = sop = stp->st_stateowner;
/* /*
* We now validate the seqid and stateid generation numbers. * We now validate the seqid and stateid generation numbers.
* For the moment, we ignore the possibility of * For the moment, we ignore the possibility of

View File

@ -148,12 +148,12 @@ xdr_error: \
} \ } \
} while (0) } while (0)
static __be32 *read_buf(struct nfsd4_compoundargs *argp, int nbytes) static __be32 *read_buf(struct nfsd4_compoundargs *argp, u32 nbytes)
{ {
/* We want more bytes than seem to be available. /* We want more bytes than seem to be available.
* Maybe we need a new page, maybe we have just run out * Maybe we need a new page, maybe we have just run out
*/ */
int avail = (char*)argp->end - (char*)argp->p; unsigned int avail = (char *)argp->end - (char *)argp->p;
__be32 *p; __be32 *p;
if (avail + argp->pagelen < nbytes) if (avail + argp->pagelen < nbytes)
return NULL; return NULL;
@ -169,6 +169,11 @@ static __be32 *read_buf(struct nfsd4_compoundargs *argp, int nbytes)
return NULL; return NULL;
} }
/*
* The following memcpy is safe because read_buf is always
* called with nbytes > avail, and the two cases above both
* guarantee p points to at least nbytes bytes.
*/
memcpy(p, argp->p, avail); memcpy(p, argp->p, avail);
/* step to next page */ /* step to next page */
argp->p = page_address(argp->pagelist[0]); argp->p = page_address(argp->pagelist[0]);
@ -1448,7 +1453,7 @@ static __be32 fattr_handle_absent_fs(u32 *bmval0, u32 *bmval1, u32 *rdattr_err)
__be32 __be32
nfsd4_encode_fattr(struct svc_fh *fhp, struct svc_export *exp, nfsd4_encode_fattr(struct svc_fh *fhp, struct svc_export *exp,
struct dentry *dentry, __be32 *buffer, int *countp, u32 *bmval, struct dentry *dentry, __be32 *buffer, int *countp, u32 *bmval,
struct svc_rqst *rqstp) struct svc_rqst *rqstp, int ignore_crossmnt)
{ {
u32 bmval0 = bmval[0]; u32 bmval0 = bmval[0];
u32 bmval1 = bmval[1]; u32 bmval1 = bmval[1];
@ -1828,7 +1833,12 @@ out_acl:
if (bmval1 & FATTR4_WORD1_MOUNTED_ON_FILEID) { if (bmval1 & FATTR4_WORD1_MOUNTED_ON_FILEID) {
if ((buflen -= 8) < 0) if ((buflen -= 8) < 0)
goto out_resource; goto out_resource;
if (exp->ex_mnt->mnt_root->d_inode == dentry->d_inode) { /*
* Get parent's attributes if not ignoring crossmount
* and this is the root of a cross-mounted filesystem.
*/
if (ignore_crossmnt == 0 &&
exp->ex_mnt->mnt_root->d_inode == dentry->d_inode) {
err = vfs_getattr(exp->ex_mnt->mnt_parent, err = vfs_getattr(exp->ex_mnt->mnt_parent,
exp->ex_mnt->mnt_mountpoint, &stat); exp->ex_mnt->mnt_mountpoint, &stat);
if (err) if (err)
@ -1864,13 +1874,25 @@ nfsd4_encode_dirent_fattr(struct nfsd4_readdir *cd,
struct svc_export *exp = cd->rd_fhp->fh_export; struct svc_export *exp = cd->rd_fhp->fh_export;
struct dentry *dentry; struct dentry *dentry;
__be32 nfserr; __be32 nfserr;
int ignore_crossmnt = 0;
dentry = lookup_one_len(name, cd->rd_fhp->fh_dentry, namlen); dentry = lookup_one_len(name, cd->rd_fhp->fh_dentry, namlen);
if (IS_ERR(dentry)) if (IS_ERR(dentry))
return nfserrno(PTR_ERR(dentry)); return nfserrno(PTR_ERR(dentry));
exp_get(exp); exp_get(exp);
if (d_mountpoint(dentry)) { /*
* In the case of a mountpoint, the client may be asking for
* attributes that are only properties of the underlying filesystem
* as opposed to the cross-mounted file system. In such a case,
* we will not follow the cross mount and will fill the attribtutes
* directly from the mountpoint dentry.
*/
if (d_mountpoint(dentry) &&
(cd->rd_bmval[0] & ~FATTR4_WORD0_RDATTR_ERROR) == 0 &&
(cd->rd_bmval[1] & ~FATTR4_WORD1_MOUNTED_ON_FILEID) == 0)
ignore_crossmnt = 1;
else if (d_mountpoint(dentry)) {
int err; int err;
/* /*
@ -1889,7 +1911,7 @@ nfsd4_encode_dirent_fattr(struct nfsd4_readdir *cd,
} }
nfserr = nfsd4_encode_fattr(NULL, exp, dentry, p, buflen, cd->rd_bmval, nfserr = nfsd4_encode_fattr(NULL, exp, dentry, p, buflen, cd->rd_bmval,
cd->rd_rqstp); cd->rd_rqstp, ignore_crossmnt);
out_put: out_put:
dput(dentry); dput(dentry);
exp_put(exp); exp_put(exp);
@ -2043,7 +2065,7 @@ nfsd4_encode_getattr(struct nfsd4_compoundres *resp, __be32 nfserr, struct nfsd4
buflen = resp->end - resp->p - (COMPOUND_ERR_SLACK_SPACE >> 2); buflen = resp->end - resp->p - (COMPOUND_ERR_SLACK_SPACE >> 2);
nfserr = nfsd4_encode_fattr(fhp, fhp->fh_export, fhp->fh_dentry, nfserr = nfsd4_encode_fattr(fhp, fhp->fh_export, fhp->fh_dentry,
resp->p, &buflen, getattr->ga_bmval, resp->p, &buflen, getattr->ga_bmval,
resp->rqstp); resp->rqstp, 0);
if (!nfserr) if (!nfserr)
resp->p += buflen; resp->p += buflen;
return nfserr; return nfserr;

View File

@ -44,17 +44,17 @@ static int nfsd_cache_append(struct svc_rqst *rqstp, struct kvec *vec);
*/ */
static DEFINE_SPINLOCK(cache_lock); static DEFINE_SPINLOCK(cache_lock);
void int nfsd_reply_cache_init(void)
nfsd_cache_init(void)
{ {
struct svc_cacherep *rp; struct svc_cacherep *rp;
int i; int i;
INIT_LIST_HEAD(&lru_head); INIT_LIST_HEAD(&lru_head);
i = CACHESIZE; i = CACHESIZE;
while(i) { while (i) {
rp = kmalloc(sizeof(*rp), GFP_KERNEL); rp = kmalloc(sizeof(*rp), GFP_KERNEL);
if (!rp) break; if (!rp)
goto out_nomem;
list_add(&rp->c_lru, &lru_head); list_add(&rp->c_lru, &lru_head);
rp->c_state = RC_UNUSED; rp->c_state = RC_UNUSED;
rp->c_type = RC_NOCACHE; rp->c_type = RC_NOCACHE;
@ -62,23 +62,19 @@ nfsd_cache_init(void)
i--; i--;
} }
if (i)
printk (KERN_ERR "nfsd: cannot allocate all %d cache entries, only got %d\n",
CACHESIZE, CACHESIZE-i);
hash_list = kcalloc (HASHSIZE, sizeof(struct hlist_head), GFP_KERNEL); hash_list = kcalloc (HASHSIZE, sizeof(struct hlist_head), GFP_KERNEL);
if (!hash_list) { if (!hash_list)
nfsd_cache_shutdown(); goto out_nomem;
printk (KERN_ERR "nfsd: cannot allocate %Zd bytes for hash list\n",
HASHSIZE * sizeof(struct hlist_head));
return;
}
cache_disabled = 0; cache_disabled = 0;
return 0;
out_nomem:
printk(KERN_ERR "nfsd: failed to allocate reply cache\n");
nfsd_reply_cache_shutdown();
return -ENOMEM;
} }
void void nfsd_reply_cache_shutdown(void)
nfsd_cache_shutdown(void)
{ {
struct svc_cacherep *rp; struct svc_cacherep *rp;

View File

@ -304,6 +304,9 @@ static ssize_t write_filehandle(struct file *file, char *buf, size_t size)
struct auth_domain *dom; struct auth_domain *dom;
struct knfsd_fh fh; struct knfsd_fh fh;
if (size == 0)
return -EINVAL;
if (buf[size-1] != '\n') if (buf[size-1] != '\n')
return -EINVAL; return -EINVAL;
buf[size-1] = 0; buf[size-1] = 0;
@ -503,7 +506,7 @@ static ssize_t write_ports(struct file *file, char *buf, size_t size)
int len = 0; int len = 0;
lock_kernel(); lock_kernel();
if (nfsd_serv) if (nfsd_serv)
len = svc_sock_names(buf, nfsd_serv, NULL); len = svc_xprt_names(nfsd_serv, buf, 0);
unlock_kernel(); unlock_kernel();
return len; return len;
} }
@ -540,7 +543,7 @@ static ssize_t write_ports(struct file *file, char *buf, size_t size)
} }
return err < 0 ? err : 0; return err < 0 ? err : 0;
} }
if (buf[0] == '-') { if (buf[0] == '-' && isdigit(buf[1])) {
char *toclose = kstrdup(buf+1, GFP_KERNEL); char *toclose = kstrdup(buf+1, GFP_KERNEL);
int len = 0; int len = 0;
if (!toclose) if (!toclose)
@ -554,6 +557,53 @@ static ssize_t write_ports(struct file *file, char *buf, size_t size)
kfree(toclose); kfree(toclose);
return len; return len;
} }
/*
* Add a transport listener by writing it's transport name
*/
if (isalpha(buf[0])) {
int err;
char transport[16];
int port;
if (sscanf(buf, "%15s %4d", transport, &port) == 2) {
err = nfsd_create_serv();
if (!err) {
err = svc_create_xprt(nfsd_serv,
transport, port,
SVC_SOCK_ANONYMOUS);
if (err == -ENOENT)
/* Give a reasonable perror msg for
* bad transport string */
err = -EPROTONOSUPPORT;
}
return err < 0 ? err : 0;
}
}
/*
* Remove a transport by writing it's transport name and port number
*/
if (buf[0] == '-' && isalpha(buf[1])) {
struct svc_xprt *xprt;
int err = -EINVAL;
char transport[16];
int port;
if (sscanf(&buf[1], "%15s %4d", transport, &port) == 2) {
if (port == 0)
return -EINVAL;
lock_kernel();
if (nfsd_serv) {
xprt = svc_find_xprt(nfsd_serv, transport,
AF_UNSPEC, port);
if (xprt) {
svc_close_xprt(xprt);
svc_xprt_put(xprt);
err = 0;
} else
err = -ENOTCONN;
}
unlock_kernel();
return err < 0 ? err : 0;
}
}
return -EINVAL; return -EINVAL;
} }
@ -616,7 +666,7 @@ static ssize_t write_recoverydir(struct file *file, char *buf, size_t size)
char *recdir; char *recdir;
int len, status; int len, status;
if (size > PATH_MAX || buf[size-1] != '\n') if (size == 0 || size > PATH_MAX || buf[size-1] != '\n')
return -EINVAL; return -EINVAL;
buf[size-1] = 0; buf[size-1] = 0;
@ -674,6 +724,27 @@ static struct file_system_type nfsd_fs_type = {
.kill_sb = kill_litter_super, .kill_sb = kill_litter_super,
}; };
#ifdef CONFIG_PROC_FS
static int create_proc_exports_entry(void)
{
struct proc_dir_entry *entry;
entry = proc_mkdir("fs/nfs", NULL);
if (!entry)
return -ENOMEM;
entry = create_proc_entry("fs/nfs/exports", 0, NULL);
if (!entry)
return -ENOMEM;
entry->proc_fops = &exports_operations;
return 0;
}
#else /* CONFIG_PROC_FS */
static int create_proc_exports_entry(void)
{
return 0;
}
#endif
static int __init init_nfsd(void) static int __init init_nfsd(void)
{ {
int retval; int retval;
@ -683,32 +754,43 @@ static int __init init_nfsd(void)
if (retval) if (retval)
return retval; return retval;
nfsd_stat_init(); /* Statistics */ nfsd_stat_init(); /* Statistics */
nfsd_cache_init(); /* RPC reply cache */ retval = nfsd_reply_cache_init();
nfsd_export_init(); /* Exports table */ if (retval)
goto out_free_stat;
retval = nfsd_export_init();
if (retval)
goto out_free_cache;
nfsd_lockd_init(); /* lockd->nfsd callbacks */ nfsd_lockd_init(); /* lockd->nfsd callbacks */
nfsd_idmap_init(); /* Name to ID mapping */ retval = nfsd_idmap_init();
if (proc_mkdir("fs/nfs", NULL)) { if (retval)
struct proc_dir_entry *entry; goto out_free_lockd;
entry = create_proc_entry("fs/nfs/exports", 0, NULL); retval = create_proc_exports_entry();
if (entry) if (retval)
entry->proc_fops = &exports_operations; goto out_free_idmap;
}
retval = register_filesystem(&nfsd_fs_type); retval = register_filesystem(&nfsd_fs_type);
if (retval) { if (retval)
nfsd_export_shutdown(); goto out_free_all;
nfsd_cache_shutdown(); return 0;
remove_proc_entry("fs/nfs/exports", NULL); out_free_all:
remove_proc_entry("fs/nfs", NULL); remove_proc_entry("fs/nfs/exports", NULL);
nfsd_stat_shutdown(); remove_proc_entry("fs/nfs", NULL);
nfsd_lockd_shutdown(); out_free_idmap:
} nfsd_idmap_shutdown();
out_free_lockd:
nfsd_lockd_shutdown();
nfsd_export_shutdown();
out_free_cache:
nfsd_reply_cache_shutdown();
out_free_stat:
nfsd_stat_shutdown();
nfsd4_free_slabs();
return retval; return retval;
} }
static void __exit exit_nfsd(void) static void __exit exit_nfsd(void)
{ {
nfsd_export_shutdown(); nfsd_export_shutdown();
nfsd_cache_shutdown(); nfsd_reply_cache_shutdown();
remove_proc_entry("fs/nfs/exports", NULL); remove_proc_entry("fs/nfs/exports", NULL);
remove_proc_entry("fs/nfs", NULL); remove_proc_entry("fs/nfs", NULL);
nfsd_stat_shutdown(); nfsd_stat_shutdown();

View File

@ -22,6 +22,7 @@
#include <linux/sunrpc/svc.h> #include <linux/sunrpc/svc.h>
#include <linux/sunrpc/svcauth_gss.h> #include <linux/sunrpc/svcauth_gss.h>
#include <linux/nfsd/nfsd.h> #include <linux/nfsd/nfsd.h>
#include "auth.h"
#define NFSDDBG_FACILITY NFSDDBG_FH #define NFSDDBG_FACILITY NFSDDBG_FH

View File

@ -155,8 +155,8 @@ static int killsig; /* signal that was used to kill last nfsd */
static void nfsd_last_thread(struct svc_serv *serv) static void nfsd_last_thread(struct svc_serv *serv)
{ {
/* When last nfsd thread exits we need to do some clean-up */ /* When last nfsd thread exits we need to do some clean-up */
struct svc_sock *svsk; struct svc_xprt *xprt;
list_for_each_entry(svsk, &serv->sv_permsocks, sk_list) list_for_each_entry(xprt, &serv->sv_permsocks, xpt_list)
lockd_down(); lockd_down();
nfsd_serv = NULL; nfsd_serv = NULL;
nfsd_racache_shutdown(); nfsd_racache_shutdown();
@ -236,7 +236,7 @@ static int nfsd_init_socks(int port)
error = lockd_up(IPPROTO_UDP); error = lockd_up(IPPROTO_UDP);
if (error >= 0) { if (error >= 0) {
error = svc_makesock(nfsd_serv, IPPROTO_UDP, port, error = svc_create_xprt(nfsd_serv, "udp", port,
SVC_SOCK_DEFAULTS); SVC_SOCK_DEFAULTS);
if (error < 0) if (error < 0)
lockd_down(); lockd_down();
@ -247,7 +247,7 @@ static int nfsd_init_socks(int port)
#ifdef CONFIG_NFSD_TCP #ifdef CONFIG_NFSD_TCP
error = lockd_up(IPPROTO_TCP); error = lockd_up(IPPROTO_TCP);
if (error >= 0) { if (error >= 0) {
error = svc_makesock(nfsd_serv, IPPROTO_TCP, port, error = svc_create_xprt(nfsd_serv, "tcp", port,
SVC_SOCK_DEFAULTS); SVC_SOCK_DEFAULTS);
if (error < 0) if (error < 0)
lockd_down(); lockd_down();

View File

@ -15,6 +15,7 @@
#include <linux/nfsd/nfsd.h> #include <linux/nfsd/nfsd.h>
#include <linux/nfsd/xdr.h> #include <linux/nfsd/xdr.h>
#include <linux/mm.h> #include <linux/mm.h>
#include "auth.h"
#define NFSDDBG_FACILITY NFSDDBG_XDR #define NFSDDBG_FACILITY NFSDDBG_XDR
@ -62,10 +63,10 @@ encode_fh(__be32 *p, struct svc_fh *fhp)
* no slashes or null bytes. * no slashes or null bytes.
*/ */
static __be32 * static __be32 *
decode_filename(__be32 *p, char **namp, int *lenp) decode_filename(__be32 *p, char **namp, unsigned int *lenp)
{ {
char *name; char *name;
int i; unsigned int i;
if ((p = xdr_decode_string_inplace(p, namp, lenp, NFS_MAXNAMLEN)) != NULL) { if ((p = xdr_decode_string_inplace(p, namp, lenp, NFS_MAXNAMLEN)) != NULL) {
for (i = 0, name = *namp; i < *lenp; i++, name++) { for (i = 0, name = *namp; i < *lenp; i++, name++) {
@ -78,10 +79,10 @@ decode_filename(__be32 *p, char **namp, int *lenp)
} }
static __be32 * static __be32 *
decode_pathname(__be32 *p, char **namp, int *lenp) decode_pathname(__be32 *p, char **namp, unsigned int *lenp)
{ {
char *name; char *name;
int i; unsigned int i;
if ((p = xdr_decode_string_inplace(p, namp, lenp, NFS_MAXPATHLEN)) != NULL) { if ((p = xdr_decode_string_inplace(p, namp, lenp, NFS_MAXPATHLEN)) != NULL) {
for (i = 0, name = *namp; i < *lenp; i++, name++) { for (i = 0, name = *namp; i < *lenp; i++, name++) {

View File

@ -132,7 +132,7 @@ out:
__be32 __be32
nfsd_lookup_dentry(struct svc_rqst *rqstp, struct svc_fh *fhp, nfsd_lookup_dentry(struct svc_rqst *rqstp, struct svc_fh *fhp,
const char *name, int len, const char *name, unsigned int len,
struct svc_export **exp_ret, struct dentry **dentry_ret) struct svc_export **exp_ret, struct dentry **dentry_ret)
{ {
struct svc_export *exp; struct svc_export *exp;
@ -226,7 +226,7 @@ out_nfserr:
*/ */
__be32 __be32
nfsd_lookup(struct svc_rqst *rqstp, struct svc_fh *fhp, const char *name, nfsd_lookup(struct svc_rqst *rqstp, struct svc_fh *fhp, const char *name,
int len, struct svc_fh *resfh) unsigned int len, struct svc_fh *resfh)
{ {
struct svc_export *exp; struct svc_export *exp;
struct dentry *dentry; struct dentry *dentry;
@ -1151,6 +1151,26 @@ nfsd_commit(struct svc_rqst *rqstp, struct svc_fh *fhp,
} }
#endif /* CONFIG_NFSD_V3 */ #endif /* CONFIG_NFSD_V3 */
__be32
nfsd_create_setattr(struct svc_rqst *rqstp, struct svc_fh *resfhp,
struct iattr *iap)
{
/*
* Mode has already been set earlier in create:
*/
iap->ia_valid &= ~ATTR_MODE;
/*
* Setting uid/gid works only for root. Irix appears to
* send along the gid on create when it tries to implement
* setgid directories via NFS:
*/
if (current->fsuid != 0)
iap->ia_valid &= ~(ATTR_UID|ATTR_GID);
if (iap->ia_valid)
return nfsd_setattr(rqstp, resfhp, iap, 0, (time_t)0);
return 0;
}
/* /*
* Create a file (regular, directory, device, fifo); UNIX sockets * Create a file (regular, directory, device, fifo); UNIX sockets
* not yet implemented. * not yet implemented.
@ -1167,6 +1187,7 @@ nfsd_create(struct svc_rqst *rqstp, struct svc_fh *fhp,
struct dentry *dentry, *dchild = NULL; struct dentry *dentry, *dchild = NULL;
struct inode *dirp; struct inode *dirp;
__be32 err; __be32 err;
__be32 err2;
int host_err; int host_err;
err = nfserr_perm; err = nfserr_perm;
@ -1257,16 +1278,9 @@ nfsd_create(struct svc_rqst *rqstp, struct svc_fh *fhp,
} }
/* Set file attributes. Mode has already been set and err2 = nfsd_create_setattr(rqstp, resfhp, iap);
* setting uid/gid works only for root. Irix appears to if (err2)
* send along the gid when it tries to implement setgid err = err2;
* directories via NFS.
*/
if ((iap->ia_valid &= ~(ATTR_UID|ATTR_GID|ATTR_MODE)) != 0) {
__be32 err2 = nfsd_setattr(rqstp, resfhp, iap, 0, (time_t)0);
if (err2)
err = err2;
}
/* /*
* Update the file handle to get the new inode info. * Update the file handle to get the new inode info.
*/ */
@ -1295,6 +1309,7 @@ nfsd_create_v3(struct svc_rqst *rqstp, struct svc_fh *fhp,
struct dentry *dentry, *dchild = NULL; struct dentry *dentry, *dchild = NULL;
struct inode *dirp; struct inode *dirp;
__be32 err; __be32 err;
__be32 err2;
int host_err; int host_err;
__u32 v_mtime=0, v_atime=0; __u32 v_mtime=0, v_atime=0;
@ -1399,16 +1414,10 @@ nfsd_create_v3(struct svc_rqst *rqstp, struct svc_fh *fhp,
iap->ia_atime.tv_nsec = 0; iap->ia_atime.tv_nsec = 0;
} }
/* Set file attributes.
* Irix appears to send along the gid when it tries to
* implement setgid directories via NFS. Clear out all that cruft.
*/
set_attr: set_attr:
if ((iap->ia_valid &= ~(ATTR_UID|ATTR_GID|ATTR_MODE)) != 0) { err2 = nfsd_create_setattr(rqstp, resfhp, iap);
__be32 err2 = nfsd_setattr(rqstp, resfhp, iap, 0, (time_t)0); if (err2)
if (err2) err = err2;
err = err2;
}
/* /*
* Update the filehandle to get the new inode info. * Update the filehandle to get the new inode info.

View File

@ -173,14 +173,17 @@ void nlmclnt_next_cookie(struct nlm_cookie *);
/* /*
* Host cache * Host cache
*/ */
struct nlm_host * nlmclnt_lookup_host(const struct sockaddr_in *, int, int, const char *, int); struct nlm_host *nlmclnt_lookup_host(const struct sockaddr_in *, int, int,
struct nlm_host * nlmsvc_lookup_host(struct svc_rqst *, const char *, int); const char *, unsigned int);
struct nlm_host *nlmsvc_lookup_host(struct svc_rqst *, const char *,
unsigned int);
struct rpc_clnt * nlm_bind_host(struct nlm_host *); struct rpc_clnt * nlm_bind_host(struct nlm_host *);
void nlm_rebind_host(struct nlm_host *); void nlm_rebind_host(struct nlm_host *);
struct nlm_host * nlm_get_host(struct nlm_host *); struct nlm_host * nlm_get_host(struct nlm_host *);
void nlm_release_host(struct nlm_host *); void nlm_release_host(struct nlm_host *);
void nlm_shutdown_hosts(void); void nlm_shutdown_hosts(void);
extern void nlm_host_rebooted(const struct sockaddr_in *, const char *, int, u32); extern void nlm_host_rebooted(const struct sockaddr_in *, const char *,
unsigned int, u32);
void nsm_release(struct nsm_handle *); void nsm_release(struct nsm_handle *);

View File

@ -29,7 +29,7 @@ struct svc_rqst;
/* Lock info passed via NLM */ /* Lock info passed via NLM */
struct nlm_lock { struct nlm_lock {
char * caller; char * caller;
int len; /* length of "caller" */ unsigned int len; /* length of "caller" */
struct nfs_fh fh; struct nfs_fh fh;
struct xdr_netobj oh; struct xdr_netobj oh;
u32 svid; u32 svid;
@ -78,7 +78,7 @@ struct nlm_res {
*/ */
struct nlm_reboot { struct nlm_reboot {
char * mon; char * mon;
int len; unsigned int len;
u32 state; u32 state;
__be32 addr; __be32 addr;
__be32 vers; __be32 vers;

View File

@ -4,4 +4,3 @@ unifdef-y += stats.h
unifdef-y += syscall.h unifdef-y += syscall.h
unifdef-y += nfsfh.h unifdef-y += nfsfh.h
unifdef-y += debug.h unifdef-y += debug.h
unifdef-y += auth.h

View File

@ -72,8 +72,8 @@ enum {
*/ */
#define RC_DELAY (HZ/5) #define RC_DELAY (HZ/5)
void nfsd_cache_init(void); int nfsd_reply_cache_init(void);
void nfsd_cache_shutdown(void); void nfsd_reply_cache_shutdown(void);
int nfsd_cache_lookup(struct svc_rqst *, int); int nfsd_cache_lookup(struct svc_rqst *, int);
void nfsd_cache_update(struct svc_rqst *, int, __be32 *); void nfsd_cache_update(struct svc_rqst *, int, __be32 *);

View File

@ -122,7 +122,7 @@ __be32 check_nfsd_access(struct svc_export *exp, struct svc_rqst *rqstp);
/* /*
* Function declarations * Function declarations
*/ */
void nfsd_export_init(void); int nfsd_export_init(void);
void nfsd_export_shutdown(void); void nfsd_export_shutdown(void);
void nfsd_export_flush(void); void nfsd_export_flush(void);
void exp_readlock(void); void exp_readlock(void);

View File

@ -20,7 +20,6 @@
#include <linux/nfsd/debug.h> #include <linux/nfsd/debug.h>
#include <linux/nfsd/nfsfh.h> #include <linux/nfsd/nfsfh.h>
#include <linux/nfsd/export.h> #include <linux/nfsd/export.h>
#include <linux/nfsd/auth.h>
#include <linux/nfsd/stats.h> #include <linux/nfsd/stats.h>
/* /*
* nfsd version * nfsd version
@ -70,9 +69,9 @@ void nfsd_racache_shutdown(void);
int nfsd_cross_mnt(struct svc_rqst *rqstp, struct dentry **dpp, int nfsd_cross_mnt(struct svc_rqst *rqstp, struct dentry **dpp,
struct svc_export **expp); struct svc_export **expp);
__be32 nfsd_lookup(struct svc_rqst *, struct svc_fh *, __be32 nfsd_lookup(struct svc_rqst *, struct svc_fh *,
const char *, int, struct svc_fh *); const char *, unsigned int, struct svc_fh *);
__be32 nfsd_lookup_dentry(struct svc_rqst *, struct svc_fh *, __be32 nfsd_lookup_dentry(struct svc_rqst *, struct svc_fh *,
const char *, int, const char *, unsigned int,
struct svc_export **, struct dentry **); struct svc_export **, struct dentry **);
__be32 nfsd_setattr(struct svc_rqst *, struct svc_fh *, __be32 nfsd_setattr(struct svc_rqst *, struct svc_fh *,
struct iattr *, int, time_t); struct iattr *, int, time_t);

View File

@ -18,7 +18,6 @@
#include <linux/nfsd/const.h> #include <linux/nfsd/const.h>
#include <linux/nfsd/export.h> #include <linux/nfsd/export.h>
#include <linux/nfsd/nfsfh.h> #include <linux/nfsd/nfsfh.h>
#include <linux/nfsd/auth.h>
/* /*
* Version of the syscall interface * Version of the syscall interface

View File

@ -23,7 +23,7 @@ struct nfsd_sattrargs {
struct nfsd_diropargs { struct nfsd_diropargs {
struct svc_fh fh; struct svc_fh fh;
char * name; char * name;
int len; unsigned int len;
}; };
struct nfsd_readargs { struct nfsd_readargs {
@ -43,17 +43,17 @@ struct nfsd_writeargs {
struct nfsd_createargs { struct nfsd_createargs {
struct svc_fh fh; struct svc_fh fh;
char * name; char * name;
int len; unsigned int len;
struct iattr attrs; struct iattr attrs;
}; };
struct nfsd_renameargs { struct nfsd_renameargs {
struct svc_fh ffh; struct svc_fh ffh;
char * fname; char * fname;
int flen; unsigned int flen;
struct svc_fh tfh; struct svc_fh tfh;
char * tname; char * tname;
int tlen; unsigned int tlen;
}; };
struct nfsd_readlinkargs { struct nfsd_readlinkargs {
@ -65,15 +65,15 @@ struct nfsd_linkargs {
struct svc_fh ffh; struct svc_fh ffh;
struct svc_fh tfh; struct svc_fh tfh;
char * tname; char * tname;
int tlen; unsigned int tlen;
}; };
struct nfsd_symlinkargs { struct nfsd_symlinkargs {
struct svc_fh ffh; struct svc_fh ffh;
char * fname; char * fname;
int flen; unsigned int flen;
char * tname; char * tname;
int tlen; unsigned int tlen;
struct iattr attrs; struct iattr attrs;
}; };

View File

@ -21,7 +21,7 @@ struct nfsd3_sattrargs {
struct nfsd3_diropargs { struct nfsd3_diropargs {
struct svc_fh fh; struct svc_fh fh;
char * name; char * name;
int len; unsigned int len;
}; };
struct nfsd3_accessargs { struct nfsd3_accessargs {
@ -48,7 +48,7 @@ struct nfsd3_writeargs {
struct nfsd3_createargs { struct nfsd3_createargs {
struct svc_fh fh; struct svc_fh fh;
char * name; char * name;
int len; unsigned int len;
int createmode; int createmode;
struct iattr attrs; struct iattr attrs;
__be32 * verf; __be32 * verf;
@ -57,7 +57,7 @@ struct nfsd3_createargs {
struct nfsd3_mknodargs { struct nfsd3_mknodargs {
struct svc_fh fh; struct svc_fh fh;
char * name; char * name;
int len; unsigned int len;
__u32 ftype; __u32 ftype;
__u32 major, minor; __u32 major, minor;
struct iattr attrs; struct iattr attrs;
@ -66,10 +66,10 @@ struct nfsd3_mknodargs {
struct nfsd3_renameargs { struct nfsd3_renameargs {
struct svc_fh ffh; struct svc_fh ffh;
char * fname; char * fname;
int flen; unsigned int flen;
struct svc_fh tfh; struct svc_fh tfh;
char * tname; char * tname;
int tlen; unsigned int tlen;
}; };
struct nfsd3_readlinkargs { struct nfsd3_readlinkargs {
@ -81,15 +81,15 @@ struct nfsd3_linkargs {
struct svc_fh ffh; struct svc_fh ffh;
struct svc_fh tfh; struct svc_fh tfh;
char * tname; char * tname;
int tlen; unsigned int tlen;
}; };
struct nfsd3_symlinkargs { struct nfsd3_symlinkargs {
struct svc_fh ffh; struct svc_fh ffh;
char * fname; char * fname;
int flen; unsigned int flen;
char * tname; char * tname;
int tlen; unsigned int tlen;
struct iattr attrs; struct iattr attrs;
}; };

View File

@ -441,7 +441,7 @@ void nfsd4_encode_operation(struct nfsd4_compoundres *, struct nfsd4_op *);
void nfsd4_encode_replay(struct nfsd4_compoundres *resp, struct nfsd4_op *op); void nfsd4_encode_replay(struct nfsd4_compoundres *resp, struct nfsd4_op *op);
__be32 nfsd4_encode_fattr(struct svc_fh *fhp, struct svc_export *exp, __be32 nfsd4_encode_fattr(struct svc_fh *fhp, struct svc_export *exp,
struct dentry *dentry, __be32 *buffer, int *countp, struct dentry *dentry, __be32 *buffer, int *countp,
u32 *bmval, struct svc_rqst *); u32 *bmval, struct svc_rqst *, int ignore_crossmnt);
extern __be32 nfsd4_setclientid(struct svc_rqst *rqstp, extern __be32 nfsd4_setclientid(struct svc_rqst *rqstp,
struct nfsd4_compound_state *, struct nfsd4_compound_state *,
struct nfsd4_setclientid *setclid); struct nfsd4_setclientid *setclid);

View File

@ -44,11 +44,16 @@
#define IDMAP_NAMESZ 128 #define IDMAP_NAMESZ 128
#ifdef CONFIG_NFSD_V4 #ifdef CONFIG_NFSD_V4
void nfsd_idmap_init(void); int nfsd_idmap_init(void);
void nfsd_idmap_shutdown(void); void nfsd_idmap_shutdown(void);
#else #else
static inline void nfsd_idmap_init(void) {}; static inline int nfsd_idmap_init(void)
static inline void nfsd_idmap_shutdown(void) {}; {
return 0;
}
static inline void nfsd_idmap_shutdown(void)
{
}
#endif #endif
int nfsd_map_name_to_uid(struct svc_rqst *, const char *, size_t, __u32 *); int nfsd_map_name_to_uid(struct svc_rqst *, const char *, size_t, __u32 *);

View File

@ -169,8 +169,8 @@ extern int cache_check(struct cache_detail *detail,
extern void cache_flush(void); extern void cache_flush(void);
extern void cache_purge(struct cache_detail *detail); extern void cache_purge(struct cache_detail *detail);
#define NEVER (0x7FFFFFFF) #define NEVER (0x7FFFFFFF)
extern void cache_register(struct cache_detail *cd); extern int cache_register(struct cache_detail *cd);
extern int cache_unregister(struct cache_detail *cd); extern void cache_unregister(struct cache_detail *cd);
extern void qword_add(char **bpp, int *lp, char *str); extern void qword_add(char **bpp, int *lp, char *str);
extern void qword_addhex(char **bpp, int *lp, char *buf, int blen); extern void qword_addhex(char **bpp, int *lp, char *buf, int blen);

View File

@ -20,7 +20,7 @@
#define RPCDBG_BIND 0x0020 #define RPCDBG_BIND 0x0020
#define RPCDBG_SCHED 0x0040 #define RPCDBG_SCHED 0x0040
#define RPCDBG_TRANS 0x0080 #define RPCDBG_TRANS 0x0080
#define RPCDBG_SVCSOCK 0x0100 #define RPCDBG_SVCXPRT 0x0100
#define RPCDBG_SVCDSP 0x0200 #define RPCDBG_SVCDSP 0x0200
#define RPCDBG_MISC 0x0400 #define RPCDBG_MISC 0x0400
#define RPCDBG_CACHE 0x0800 #define RPCDBG_CACHE 0x0800

View File

@ -204,7 +204,7 @@ union svc_addr_u {
struct svc_rqst { struct svc_rqst {
struct list_head rq_list; /* idle list */ struct list_head rq_list; /* idle list */
struct list_head rq_all; /* all threads list */ struct list_head rq_all; /* all threads list */
struct svc_sock * rq_sock; /* socket */ struct svc_xprt * rq_xprt; /* transport ptr */
struct sockaddr_storage rq_addr; /* peer address */ struct sockaddr_storage rq_addr; /* peer address */
size_t rq_addrlen; size_t rq_addrlen;
@ -214,9 +214,10 @@ struct svc_rqst {
struct auth_ops * rq_authop; /* authentication flavour */ struct auth_ops * rq_authop; /* authentication flavour */
u32 rq_flavor; /* pseudoflavor */ u32 rq_flavor; /* pseudoflavor */
struct svc_cred rq_cred; /* auth info */ struct svc_cred rq_cred; /* auth info */
struct sk_buff * rq_skbuff; /* fast recv inet buffer */ void * rq_xprt_ctxt; /* transport specific context ptr */
struct svc_deferred_req*rq_deferred; /* deferred request we are replaying */ struct svc_deferred_req*rq_deferred; /* deferred request we are replaying */
size_t rq_xprt_hlen; /* xprt header len */
struct xdr_buf rq_arg; struct xdr_buf rq_arg;
struct xdr_buf rq_res; struct xdr_buf rq_res;
struct page * rq_pages[RPCSVC_MAXPAGES]; struct page * rq_pages[RPCSVC_MAXPAGES];
@ -317,11 +318,12 @@ static inline void svc_free_res_pages(struct svc_rqst *rqstp)
struct svc_deferred_req { struct svc_deferred_req {
u32 prot; /* protocol (UDP or TCP) */ u32 prot; /* protocol (UDP or TCP) */
struct svc_sock *svsk; struct svc_xprt *xprt;
struct sockaddr_storage addr; /* where reply must go */ struct sockaddr_storage addr; /* where reply must go */
size_t addrlen; size_t addrlen;
union svc_addr_u daddr; /* where reply must come from */ union svc_addr_u daddr; /* where reply must come from */
struct cache_deferred_req handle; struct cache_deferred_req handle;
size_t xprt_hlen;
int argslen; int argslen;
__be32 args[0]; __be32 args[0];
}; };
@ -382,6 +384,8 @@ struct svc_procedure {
*/ */
struct svc_serv * svc_create(struct svc_program *, unsigned int, struct svc_serv * svc_create(struct svc_program *, unsigned int,
void (*shutdown)(struct svc_serv*)); void (*shutdown)(struct svc_serv*));
struct svc_rqst *svc_prepare_thread(struct svc_serv *serv,
struct svc_pool *pool);
int svc_create_thread(svc_thread_fn, struct svc_serv *); int svc_create_thread(svc_thread_fn, struct svc_serv *);
void svc_exit_thread(struct svc_rqst *); void svc_exit_thread(struct svc_rqst *);
struct svc_serv * svc_create_pooled(struct svc_program *, unsigned int, struct svc_serv * svc_create_pooled(struct svc_program *, unsigned int,

View File

@ -0,0 +1,262 @@
/*
* Copyright (c) 2005-2006 Network Appliance, Inc. All rights reserved.
*
* This software is available to you under a choice of one of two
* licenses. You may choose to be licensed under the terms of the GNU
* General Public License (GPL) Version 2, available from the file
* COPYING in the main directory of this source tree, or the BSD-type
* license below:
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
*
* Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
*
* Redistributions in binary form must reproduce the above
* copyright notice, this list of conditions and the following
* disclaimer in the documentation and/or other materials provided
* with the distribution.
*
* Neither the name of the Network Appliance, Inc. nor the names of
* its contributors may be used to endorse or promote products
* derived from this software without specific prior written
* permission.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
* "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
* LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
* A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
* OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
* SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
* LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
* DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
* THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
* OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*
* Author: Tom Tucker <tom@opengridcomputing.com>
*/
#ifndef SVC_RDMA_H
#define SVC_RDMA_H
#include <linux/sunrpc/xdr.h>
#include <linux/sunrpc/svcsock.h>
#include <linux/sunrpc/rpc_rdma.h>
#include <rdma/ib_verbs.h>
#include <rdma/rdma_cm.h>
#define SVCRDMA_DEBUG
/* RPC/RDMA parameters and stats */
extern unsigned int svcrdma_ord;
extern unsigned int svcrdma_max_requests;
extern unsigned int svcrdma_max_req_size;
extern atomic_t rdma_stat_recv;
extern atomic_t rdma_stat_read;
extern atomic_t rdma_stat_write;
extern atomic_t rdma_stat_sq_starve;
extern atomic_t rdma_stat_rq_starve;
extern atomic_t rdma_stat_rq_poll;
extern atomic_t rdma_stat_rq_prod;
extern atomic_t rdma_stat_sq_poll;
extern atomic_t rdma_stat_sq_prod;
#define RPCRDMA_VERSION 1
/*
* Contexts are built when an RDMA request is created and are a
* record of the resources that can be recovered when the request
* completes.
*/
struct svc_rdma_op_ctxt {
struct svc_rdma_op_ctxt *next;
struct xdr_buf arg;
struct list_head dto_q;
enum ib_wr_opcode wr_op;
enum ib_wc_status wc_status;
u32 byte_len;
struct svcxprt_rdma *xprt;
unsigned long flags;
enum dma_data_direction direction;
int count;
struct ib_sge sge[RPCSVC_MAXPAGES];
struct page *pages[RPCSVC_MAXPAGES];
};
#define RDMACTXT_F_READ_DONE 1
#define RDMACTXT_F_LAST_CTXT 2
struct svcxprt_rdma {
struct svc_xprt sc_xprt; /* SVC transport structure */
struct rdma_cm_id *sc_cm_id; /* RDMA connection id */
struct list_head sc_accept_q; /* Conn. waiting accept */
int sc_ord; /* RDMA read limit */
wait_queue_head_t sc_read_wait;
int sc_max_sge;
int sc_sq_depth; /* Depth of SQ */
atomic_t sc_sq_count; /* Number of SQ WR on queue */
int sc_max_requests; /* Depth of RQ */
int sc_max_req_size; /* Size of each RQ WR buf */
struct ib_pd *sc_pd;
struct svc_rdma_op_ctxt *sc_ctxt_head;
int sc_ctxt_cnt;
int sc_ctxt_bump;
int sc_ctxt_max;
spinlock_t sc_ctxt_lock;
struct list_head sc_rq_dto_q;
spinlock_t sc_rq_dto_lock;
struct ib_qp *sc_qp;
struct ib_cq *sc_rq_cq;
struct ib_cq *sc_sq_cq;
struct ib_mr *sc_phys_mr; /* MR for server memory */
spinlock_t sc_lock; /* transport lock */
wait_queue_head_t sc_send_wait; /* SQ exhaustion waitlist */
unsigned long sc_flags;
struct list_head sc_dto_q; /* DTO tasklet I/O pending Q */
struct list_head sc_read_complete_q;
spinlock_t sc_read_complete_lock;
};
/* sc_flags */
#define RDMAXPRT_RQ_PENDING 1
#define RDMAXPRT_SQ_PENDING 2
#define RDMAXPRT_CONN_PENDING 3
#define RPCRDMA_LISTEN_BACKLOG 10
/* The default ORD value is based on two outstanding full-size writes with a
* page size of 4k, or 32k * 2 ops / 4k = 16 outstanding RDMA_READ. */
#define RPCRDMA_ORD (64/4)
#define RPCRDMA_SQ_DEPTH_MULT 8
#define RPCRDMA_MAX_THREADS 16
#define RPCRDMA_MAX_REQUESTS 16
#define RPCRDMA_MAX_REQ_SIZE 4096
/* svc_rdma_marshal.c */
extern void svc_rdma_rcl_chunk_counts(struct rpcrdma_read_chunk *,
int *, int *);
extern int svc_rdma_xdr_decode_req(struct rpcrdma_msg **, struct svc_rqst *);
extern int svc_rdma_xdr_decode_deferred_req(struct svc_rqst *);
extern int svc_rdma_xdr_encode_error(struct svcxprt_rdma *,
struct rpcrdma_msg *,
enum rpcrdma_errcode, u32 *);
extern void svc_rdma_xdr_encode_write_list(struct rpcrdma_msg *, int);
extern void svc_rdma_xdr_encode_reply_array(struct rpcrdma_write_array *, int);
extern void svc_rdma_xdr_encode_array_chunk(struct rpcrdma_write_array *, int,
u32, u64, u32);
extern void svc_rdma_xdr_encode_reply_header(struct svcxprt_rdma *,
struct rpcrdma_msg *,
struct rpcrdma_msg *,
enum rpcrdma_proc);
extern int svc_rdma_xdr_get_reply_hdr_len(struct rpcrdma_msg *);
/* svc_rdma_recvfrom.c */
extern int svc_rdma_recvfrom(struct svc_rqst *);
/* svc_rdma_sendto.c */
extern int svc_rdma_sendto(struct svc_rqst *);
/* svc_rdma_transport.c */
extern int svc_rdma_send(struct svcxprt_rdma *, struct ib_send_wr *);
extern int svc_rdma_send_error(struct svcxprt_rdma *, struct rpcrdma_msg *,
enum rpcrdma_errcode);
struct page *svc_rdma_get_page(void);
extern int svc_rdma_post_recv(struct svcxprt_rdma *);
extern int svc_rdma_create_listen(struct svc_serv *, int, struct sockaddr *);
extern struct svc_rdma_op_ctxt *svc_rdma_get_context(struct svcxprt_rdma *);
extern void svc_rdma_put_context(struct svc_rdma_op_ctxt *, int);
extern void svc_sq_reap(struct svcxprt_rdma *);
extern void svc_rq_reap(struct svcxprt_rdma *);
extern struct svc_xprt_class svc_rdma_class;
extern void svc_rdma_prep_reply_hdr(struct svc_rqst *);
/* svc_rdma.c */
extern int svc_rdma_init(void);
extern void svc_rdma_cleanup(void);
/*
* Returns the address of the first read chunk or <nul> if no read chunk is
* present
*/
static inline struct rpcrdma_read_chunk *
svc_rdma_get_read_chunk(struct rpcrdma_msg *rmsgp)
{
struct rpcrdma_read_chunk *ch =
(struct rpcrdma_read_chunk *)&rmsgp->rm_body.rm_chunks[0];
if (ch->rc_discrim == 0)
return NULL;
return ch;
}
/*
* Returns the address of the first read write array element or <nul> if no
* write array list is present
*/
static inline struct rpcrdma_write_array *
svc_rdma_get_write_array(struct rpcrdma_msg *rmsgp)
{
if (rmsgp->rm_body.rm_chunks[0] != 0
|| rmsgp->rm_body.rm_chunks[1] == 0)
return NULL;
return (struct rpcrdma_write_array *)&rmsgp->rm_body.rm_chunks[1];
}
/*
* Returns the address of the first reply array element or <nul> if no
* reply array is present
*/
static inline struct rpcrdma_write_array *
svc_rdma_get_reply_array(struct rpcrdma_msg *rmsgp)
{
struct rpcrdma_read_chunk *rch;
struct rpcrdma_write_array *wr_ary;
struct rpcrdma_write_array *rp_ary;
/* XXX: Need to fix when reply list may occur with read-list and/or
* write list */
if (rmsgp->rm_body.rm_chunks[0] != 0 ||
rmsgp->rm_body.rm_chunks[1] != 0)
return NULL;
rch = svc_rdma_get_read_chunk(rmsgp);
if (rch) {
while (rch->rc_discrim)
rch++;
/* The reply list follows an empty write array located
* at 'rc_position' here. The reply array is at rc_target.
*/
rp_ary = (struct rpcrdma_write_array *)&rch->rc_target;
goto found_it;
}
wr_ary = svc_rdma_get_write_array(rmsgp);
if (wr_ary) {
rp_ary = (struct rpcrdma_write_array *)
&wr_ary->
wc_array[wr_ary->wc_nchunks].wc_target.rs_length;
goto found_it;
}
/* No read list, no write list */
rp_ary = (struct rpcrdma_write_array *)
&rmsgp->rm_body.rm_chunks[2];
found_it:
if (rp_ary->wc_discrim == 0)
return NULL;
return rp_ary;
}
#endif

View File

@ -0,0 +1,159 @@
/*
* linux/include/linux/sunrpc/svc_xprt.h
*
* RPC server transport I/O
*/
#ifndef SUNRPC_SVC_XPRT_H
#define SUNRPC_SVC_XPRT_H
#include <linux/sunrpc/svc.h>
#include <linux/module.h>
struct svc_xprt_ops {
struct svc_xprt *(*xpo_create)(struct svc_serv *,
struct sockaddr *, int,
int);
struct svc_xprt *(*xpo_accept)(struct svc_xprt *);
int (*xpo_has_wspace)(struct svc_xprt *);
int (*xpo_recvfrom)(struct svc_rqst *);
void (*xpo_prep_reply_hdr)(struct svc_rqst *);
int (*xpo_sendto)(struct svc_rqst *);
void (*xpo_release_rqst)(struct svc_rqst *);
void (*xpo_detach)(struct svc_xprt *);
void (*xpo_free)(struct svc_xprt *);
};
struct svc_xprt_class {
const char *xcl_name;
struct module *xcl_owner;
struct svc_xprt_ops *xcl_ops;
struct list_head xcl_list;
u32 xcl_max_payload;
};
struct svc_xprt {
struct svc_xprt_class *xpt_class;
struct svc_xprt_ops *xpt_ops;
struct kref xpt_ref;
struct list_head xpt_list;
struct list_head xpt_ready;
unsigned long xpt_flags;
#define XPT_BUSY 0 /* enqueued/receiving */
#define XPT_CONN 1 /* conn pending */
#define XPT_CLOSE 2 /* dead or dying */
#define XPT_DATA 3 /* data pending */
#define XPT_TEMP 4 /* connected transport */
#define XPT_DEAD 6 /* transport closed */
#define XPT_CHNGBUF 7 /* need to change snd/rcv buf sizes */
#define XPT_DEFERRED 8 /* deferred request pending */
#define XPT_OLD 9 /* used for xprt aging mark+sweep */
#define XPT_DETACHED 10 /* detached from tempsocks list */
#define XPT_LISTENER 11 /* listening endpoint */
#define XPT_CACHE_AUTH 12 /* cache auth info */
struct svc_pool *xpt_pool; /* current pool iff queued */
struct svc_serv *xpt_server; /* service for transport */
atomic_t xpt_reserved; /* space on outq that is rsvd */
struct mutex xpt_mutex; /* to serialize sending data */
spinlock_t xpt_lock; /* protects sk_deferred
* and xpt_auth_cache */
void *xpt_auth_cache;/* auth cache */
struct list_head xpt_deferred; /* deferred requests that need
* to be revisted */
struct sockaddr_storage xpt_local; /* local address */
size_t xpt_locallen; /* length of address */
struct sockaddr_storage xpt_remote; /* remote peer's address */
size_t xpt_remotelen; /* length of address */
};
int svc_reg_xprt_class(struct svc_xprt_class *);
void svc_unreg_xprt_class(struct svc_xprt_class *);
void svc_xprt_init(struct svc_xprt_class *, struct svc_xprt *,
struct svc_serv *);
int svc_create_xprt(struct svc_serv *, char *, unsigned short, int);
void svc_xprt_enqueue(struct svc_xprt *xprt);
void svc_xprt_received(struct svc_xprt *);
void svc_xprt_put(struct svc_xprt *xprt);
void svc_xprt_copy_addrs(struct svc_rqst *rqstp, struct svc_xprt *xprt);
void svc_close_xprt(struct svc_xprt *xprt);
void svc_delete_xprt(struct svc_xprt *xprt);
int svc_port_is_privileged(struct sockaddr *sin);
int svc_print_xprts(char *buf, int maxlen);
struct svc_xprt *svc_find_xprt(struct svc_serv *, char *, int, int);
int svc_xprt_names(struct svc_serv *serv, char *buf, int buflen);
static inline void svc_xprt_get(struct svc_xprt *xprt)
{
kref_get(&xprt->xpt_ref);
}
static inline void svc_xprt_set_local(struct svc_xprt *xprt,
struct sockaddr *sa, int salen)
{
memcpy(&xprt->xpt_local, sa, salen);
xprt->xpt_locallen = salen;
}
static inline void svc_xprt_set_remote(struct svc_xprt *xprt,
struct sockaddr *sa, int salen)
{
memcpy(&xprt->xpt_remote, sa, salen);
xprt->xpt_remotelen = salen;
}
static inline unsigned short svc_addr_port(struct sockaddr *sa)
{
unsigned short ret = 0;
switch (sa->sa_family) {
case AF_INET:
ret = ntohs(((struct sockaddr_in *)sa)->sin_port);
break;
case AF_INET6:
ret = ntohs(((struct sockaddr_in6 *)sa)->sin6_port);
break;
}
return ret;
}
static inline size_t svc_addr_len(struct sockaddr *sa)
{
switch (sa->sa_family) {
case AF_INET:
return sizeof(struct sockaddr_in);
case AF_INET6:
return sizeof(struct sockaddr_in6);
}
return -EAFNOSUPPORT;
}
static inline unsigned short svc_xprt_local_port(struct svc_xprt *xprt)
{
return svc_addr_port((struct sockaddr *)&xprt->xpt_local);
}
static inline unsigned short svc_xprt_remote_port(struct svc_xprt *xprt)
{
return svc_addr_port((struct sockaddr *)&xprt->xpt_remote);
}
static inline char *__svc_print_addr(struct sockaddr *addr,
char *buf, size_t len)
{
switch (addr->sa_family) {
case AF_INET:
snprintf(buf, len, "%u.%u.%u.%u, port=%u",
NIPQUAD(((struct sockaddr_in *) addr)->sin_addr),
ntohs(((struct sockaddr_in *) addr)->sin_port));
break;
case AF_INET6:
snprintf(buf, len, "%x:%x:%x:%x:%x:%x:%x:%x, port=%u",
NIP6(((struct sockaddr_in6 *) addr)->sin6_addr),
ntohs(((struct sockaddr_in6 *) addr)->sin6_port));
break;
default:
snprintf(buf, len, "unknown address type: %d", addr->sa_family);
break;
}
return buf;
}
#endif /* SUNRPC_SVC_XPRT_H */

View File

@ -10,42 +10,16 @@
#define SUNRPC_SVCSOCK_H #define SUNRPC_SVCSOCK_H
#include <linux/sunrpc/svc.h> #include <linux/sunrpc/svc.h>
#include <linux/sunrpc/svc_xprt.h>
/* /*
* RPC server socket. * RPC server socket.
*/ */
struct svc_sock { struct svc_sock {
struct list_head sk_ready; /* list of ready sockets */ struct svc_xprt sk_xprt;
struct list_head sk_list; /* list of all sockets */
struct socket * sk_sock; /* berkeley socket layer */ struct socket * sk_sock; /* berkeley socket layer */
struct sock * sk_sk; /* INET layer */ struct sock * sk_sk; /* INET layer */
struct svc_pool * sk_pool; /* current pool iff queued */
struct svc_serv * sk_server; /* service for this socket */
atomic_t sk_inuse; /* use count */
unsigned long sk_flags;
#define SK_BUSY 0 /* enqueued/receiving */
#define SK_CONN 1 /* conn pending */
#define SK_CLOSE 2 /* dead or dying */
#define SK_DATA 3 /* data pending */
#define SK_TEMP 4 /* temp (TCP) socket */
#define SK_DEAD 6 /* socket closed */
#define SK_CHNGBUF 7 /* need to change snd/rcv buffer sizes */
#define SK_DEFERRED 8 /* request on sk_deferred */
#define SK_OLD 9 /* used for temp socket aging mark+sweep */
#define SK_DETACHED 10 /* detached from tempsocks list */
atomic_t sk_reserved; /* space on outq that is reserved */
spinlock_t sk_lock; /* protects sk_deferred and
* sk_info_authunix */
struct list_head sk_deferred; /* deferred requests that need to
* be revisted */
struct mutex sk_mutex; /* to serialize sending data */
int (*sk_recvfrom)(struct svc_rqst *rqstp);
int (*sk_sendto)(struct svc_rqst *rqstp);
/* We keep the old state_change and data_ready CB's here */ /* We keep the old state_change and data_ready CB's here */
void (*sk_ostate)(struct sock *); void (*sk_ostate)(struct sock *);
void (*sk_odata)(struct sock *, int bytes); void (*sk_odata)(struct sock *, int bytes);
@ -54,21 +28,12 @@ struct svc_sock {
/* private TCP part */ /* private TCP part */
int sk_reclen; /* length of record */ int sk_reclen; /* length of record */
int sk_tcplen; /* current read length */ int sk_tcplen; /* current read length */
time_t sk_lastrecv; /* time of last received request */
/* cache of various info for TCP sockets */
void *sk_info_authunix;
struct sockaddr_storage sk_local; /* local address */
struct sockaddr_storage sk_remote; /* remote peer's address */
int sk_remotelen; /* length of address */
}; };
/* /*
* Function prototypes. * Function prototypes.
*/ */
int svc_makesock(struct svc_serv *, int, unsigned short, int flags); void svc_close_all(struct list_head *);
void svc_force_close_socket(struct svc_sock *);
int svc_recv(struct svc_rqst *, long); int svc_recv(struct svc_rqst *, long);
int svc_send(struct svc_rqst *); int svc_send(struct svc_rqst *);
void svc_drop(struct svc_rqst *); void svc_drop(struct svc_rqst *);
@ -78,6 +43,8 @@ int svc_addsock(struct svc_serv *serv,
int fd, int fd,
char *name_return, char *name_return,
int *proto); int *proto);
void svc_init_xprt_sock(void);
void svc_cleanup_xprt_sock(void);
/* /*
* svc_makesock socket characteristics * svc_makesock socket characteristics

View File

@ -112,7 +112,8 @@ struct xdr_buf {
__be32 *xdr_encode_opaque_fixed(__be32 *p, const void *ptr, unsigned int len); __be32 *xdr_encode_opaque_fixed(__be32 *p, const void *ptr, unsigned int len);
__be32 *xdr_encode_opaque(__be32 *p, const void *ptr, unsigned int len); __be32 *xdr_encode_opaque(__be32 *p, const void *ptr, unsigned int len);
__be32 *xdr_encode_string(__be32 *p, const char *s); __be32 *xdr_encode_string(__be32 *p, const char *s);
__be32 *xdr_decode_string_inplace(__be32 *p, char **sp, int *lenp, int maxlen); __be32 *xdr_decode_string_inplace(__be32 *p, char **sp, unsigned int *lenp,
unsigned int maxlen);
__be32 *xdr_encode_netobj(__be32 *p, const struct xdr_netobj *); __be32 *xdr_encode_netobj(__be32 *p, const struct xdr_netobj *);
__be32 *xdr_decode_netobj(__be32 *p, struct xdr_netobj *); __be32 *xdr_decode_netobj(__be32 *p, struct xdr_netobj *);

View File

@ -11,6 +11,7 @@ sunrpc-y := clnt.o xprt.o socklib.o xprtsock.o sched.o \
auth.o auth_null.o auth_unix.o \ auth.o auth_null.o auth_unix.o \
svc.o svcsock.o svcauth.o svcauth_unix.o \ svc.o svcsock.o svcauth.o svcauth_unix.o \
rpcb_clnt.o timer.o xdr.o \ rpcb_clnt.o timer.o xdr.o \
sunrpc_syms.o cache.o rpc_pipe.o sunrpc_syms.o cache.o rpc_pipe.o \
svc_xprt.o
sunrpc-$(CONFIG_PROC_FS) += stats.o sunrpc-$(CONFIG_PROC_FS) += stats.o
sunrpc-$(CONFIG_SYSCTL) += sysctl.o sunrpc-$(CONFIG_SYSCTL) += sysctl.o

View File

@ -224,38 +224,34 @@ static int rsi_parse(struct cache_detail *cd,
/* major/minor */ /* major/minor */
len = qword_get(&mesg, buf, mlen); len = qword_get(&mesg, buf, mlen);
if (len <= 0)
goto out;
rsii.major_status = simple_strtoul(buf, &ep, 10);
if (*ep)
goto out;
len = qword_get(&mesg, buf, mlen);
if (len <= 0)
goto out;
rsii.minor_status = simple_strtoul(buf, &ep, 10);
if (*ep)
goto out;
/* out_handle */
len = qword_get(&mesg, buf, mlen);
if (len < 0) if (len < 0)
goto out; goto out;
if (len == 0) { status = -ENOMEM;
if (dup_to_netobj(&rsii.out_handle, buf, len))
goto out; goto out;
} else {
rsii.major_status = simple_strtoul(buf, &ep, 10);
if (*ep)
goto out;
len = qword_get(&mesg, buf, mlen);
if (len <= 0)
goto out;
rsii.minor_status = simple_strtoul(buf, &ep, 10);
if (*ep)
goto out;
/* out_handle */ /* out_token */
len = qword_get(&mesg, buf, mlen); len = qword_get(&mesg, buf, mlen);
if (len < 0) status = -EINVAL;
goto out; if (len < 0)
status = -ENOMEM; goto out;
if (dup_to_netobj(&rsii.out_handle, buf, len)) status = -ENOMEM;
goto out; if (dup_to_netobj(&rsii.out_token, buf, len))
goto out;
/* out_token */
len = qword_get(&mesg, buf, mlen);
status = -EINVAL;
if (len < 0)
goto out;
status = -ENOMEM;
if (dup_to_netobj(&rsii.out_token, buf, len))
goto out;
}
rsii.h.expiry_time = expiry; rsii.h.expiry_time = expiry;
rsip = rsi_update(&rsii, rsip); rsip = rsi_update(&rsii, rsip);
status = 0; status = 0;
@ -975,6 +971,7 @@ static int svcauth_gss_handle_init(struct svc_rqst *rqstp,
struct kvec *resv = &rqstp->rq_res.head[0]; struct kvec *resv = &rqstp->rq_res.head[0];
struct xdr_netobj tmpobj; struct xdr_netobj tmpobj;
struct rsi *rsip, rsikey; struct rsi *rsip, rsikey;
int ret;
/* Read the verifier; should be NULL: */ /* Read the verifier; should be NULL: */
*authp = rpc_autherr_badverf; *authp = rpc_autherr_badverf;
@ -1014,23 +1011,27 @@ static int svcauth_gss_handle_init(struct svc_rqst *rqstp,
/* No upcall result: */ /* No upcall result: */
return SVC_DROP; return SVC_DROP;
case 0: case 0:
ret = SVC_DROP;
/* Got an answer to the upcall; use it: */ /* Got an answer to the upcall; use it: */
if (gss_write_init_verf(rqstp, rsip)) if (gss_write_init_verf(rqstp, rsip))
return SVC_DROP; goto out;
if (resv->iov_len + 4 > PAGE_SIZE) if (resv->iov_len + 4 > PAGE_SIZE)
return SVC_DROP; goto out;
svc_putnl(resv, RPC_SUCCESS); svc_putnl(resv, RPC_SUCCESS);
if (svc_safe_putnetobj(resv, &rsip->out_handle)) if (svc_safe_putnetobj(resv, &rsip->out_handle))
return SVC_DROP; goto out;
if (resv->iov_len + 3 * 4 > PAGE_SIZE) if (resv->iov_len + 3 * 4 > PAGE_SIZE)
return SVC_DROP; goto out;
svc_putnl(resv, rsip->major_status); svc_putnl(resv, rsip->major_status);
svc_putnl(resv, rsip->minor_status); svc_putnl(resv, rsip->minor_status);
svc_putnl(resv, GSS_SEQ_WIN); svc_putnl(resv, GSS_SEQ_WIN);
if (svc_safe_putnetobj(resv, &rsip->out_token)) if (svc_safe_putnetobj(resv, &rsip->out_token))
return SVC_DROP; goto out;
} }
return SVC_COMPLETE; ret = SVC_COMPLETE;
out:
cache_put(&rsip->h, &rsi_cache);
return ret;
} }
/* /*
@ -1125,6 +1126,7 @@ svcauth_gss_accept(struct svc_rqst *rqstp, __be32 *authp)
case RPC_GSS_PROC_DESTROY: case RPC_GSS_PROC_DESTROY:
if (gss_write_verf(rqstp, rsci->mechctx, gc->gc_seq)) if (gss_write_verf(rqstp, rsci->mechctx, gc->gc_seq))
goto auth_err; goto auth_err;
rsci->h.expiry_time = get_seconds();
set_bit(CACHE_NEGATIVE, &rsci->h.flags); set_bit(CACHE_NEGATIVE, &rsci->h.flags);
if (resv->iov_len + 4 > PAGE_SIZE) if (resv->iov_len + 4 > PAGE_SIZE)
goto drop; goto drop;
@ -1386,19 +1388,26 @@ int
gss_svc_init(void) gss_svc_init(void)
{ {
int rv = svc_auth_register(RPC_AUTH_GSS, &svcauthops_gss); int rv = svc_auth_register(RPC_AUTH_GSS, &svcauthops_gss);
if (rv == 0) { if (rv)
cache_register(&rsc_cache); return rv;
cache_register(&rsi_cache); rv = cache_register(&rsc_cache);
} if (rv)
goto out1;
rv = cache_register(&rsi_cache);
if (rv)
goto out2;
return 0;
out2:
cache_unregister(&rsc_cache);
out1:
svc_auth_unregister(RPC_AUTH_GSS);
return rv; return rv;
} }
void void
gss_svc_shutdown(void) gss_svc_shutdown(void)
{ {
if (cache_unregister(&rsc_cache)) cache_unregister(&rsc_cache);
printk(KERN_ERR "auth_rpcgss: failed to unregister rsc cache\n"); cache_unregister(&rsi_cache);
if (cache_unregister(&rsi_cache))
printk(KERN_ERR "auth_rpcgss: failed to unregister rsi cache\n");
svc_auth_unregister(RPC_AUTH_GSS); svc_auth_unregister(RPC_AUTH_GSS);
} }

View File

@ -245,6 +245,7 @@ int cache_check(struct cache_detail *detail,
cache_put(h, detail); cache_put(h, detail);
return rv; return rv;
} }
EXPORT_SYMBOL(cache_check);
/* /*
* caches need to be periodically cleaned. * caches need to be periodically cleaned.
@ -290,44 +291,78 @@ static const struct file_operations cache_flush_operations;
static void do_cache_clean(struct work_struct *work); static void do_cache_clean(struct work_struct *work);
static DECLARE_DELAYED_WORK(cache_cleaner, do_cache_clean); static DECLARE_DELAYED_WORK(cache_cleaner, do_cache_clean);
void cache_register(struct cache_detail *cd) static void remove_cache_proc_entries(struct cache_detail *cd)
{ {
if (cd->proc_ent == NULL)
return;
if (cd->flush_ent)
remove_proc_entry("flush", cd->proc_ent);
if (cd->channel_ent)
remove_proc_entry("channel", cd->proc_ent);
if (cd->content_ent)
remove_proc_entry("content", cd->proc_ent);
cd->proc_ent = NULL;
remove_proc_entry(cd->name, proc_net_rpc);
}
#ifdef CONFIG_PROC_FS
static int create_cache_proc_entries(struct cache_detail *cd)
{
struct proc_dir_entry *p;
cd->proc_ent = proc_mkdir(cd->name, proc_net_rpc); cd->proc_ent = proc_mkdir(cd->name, proc_net_rpc);
if (cd->proc_ent) { if (cd->proc_ent == NULL)
struct proc_dir_entry *p; goto out_nomem;
cd->proc_ent->owner = cd->owner; cd->proc_ent->owner = cd->owner;
cd->channel_ent = cd->content_ent = NULL; cd->channel_ent = cd->content_ent = NULL;
p = create_proc_entry("flush", S_IFREG|S_IRUSR|S_IWUSR, p = create_proc_entry("flush", S_IFREG|S_IRUSR|S_IWUSR, cd->proc_ent);
cd->flush_ent = p;
if (p == NULL)
goto out_nomem;
p->proc_fops = &cache_flush_operations;
p->owner = cd->owner;
p->data = cd;
if (cd->cache_request || cd->cache_parse) {
p = create_proc_entry("channel", S_IFREG|S_IRUSR|S_IWUSR,
cd->proc_ent); cd->proc_ent);
cd->flush_ent = p; cd->channel_ent = p;
if (p) { if (p == NULL)
p->proc_fops = &cache_flush_operations; goto out_nomem;
p->owner = cd->owner; p->proc_fops = &cache_file_operations;
p->data = cd; p->owner = cd->owner;
} p->data = cd;
if (cd->cache_request || cd->cache_parse) {
p = create_proc_entry("channel", S_IFREG|S_IRUSR|S_IWUSR,
cd->proc_ent);
cd->channel_ent = p;
if (p) {
p->proc_fops = &cache_file_operations;
p->owner = cd->owner;
p->data = cd;
}
}
if (cd->cache_show) {
p = create_proc_entry("content", S_IFREG|S_IRUSR|S_IWUSR,
cd->proc_ent);
cd->content_ent = p;
if (p) {
p->proc_fops = &content_file_operations;
p->owner = cd->owner;
p->data = cd;
}
}
} }
if (cd->cache_show) {
p = create_proc_entry("content", S_IFREG|S_IRUSR|S_IWUSR,
cd->proc_ent);
cd->content_ent = p;
if (p == NULL)
goto out_nomem;
p->proc_fops = &content_file_operations;
p->owner = cd->owner;
p->data = cd;
}
return 0;
out_nomem:
remove_cache_proc_entries(cd);
return -ENOMEM;
}
#else /* CONFIG_PROC_FS */
static int create_cache_proc_entries(struct cache_detail *cd)
{
return 0;
}
#endif
int cache_register(struct cache_detail *cd)
{
int ret;
ret = create_cache_proc_entries(cd);
if (ret)
return ret;
rwlock_init(&cd->hash_lock); rwlock_init(&cd->hash_lock);
INIT_LIST_HEAD(&cd->queue); INIT_LIST_HEAD(&cd->queue);
spin_lock(&cache_list_lock); spin_lock(&cache_list_lock);
@ -341,9 +376,11 @@ void cache_register(struct cache_detail *cd)
/* start the cleaning process */ /* start the cleaning process */
schedule_delayed_work(&cache_cleaner, 0); schedule_delayed_work(&cache_cleaner, 0);
return 0;
} }
EXPORT_SYMBOL(cache_register);
int cache_unregister(struct cache_detail *cd) void cache_unregister(struct cache_detail *cd)
{ {
cache_purge(cd); cache_purge(cd);
spin_lock(&cache_list_lock); spin_lock(&cache_list_lock);
@ -351,30 +388,23 @@ int cache_unregister(struct cache_detail *cd)
if (cd->entries || atomic_read(&cd->inuse)) { if (cd->entries || atomic_read(&cd->inuse)) {
write_unlock(&cd->hash_lock); write_unlock(&cd->hash_lock);
spin_unlock(&cache_list_lock); spin_unlock(&cache_list_lock);
return -EBUSY; goto out;
} }
if (current_detail == cd) if (current_detail == cd)
current_detail = NULL; current_detail = NULL;
list_del_init(&cd->others); list_del_init(&cd->others);
write_unlock(&cd->hash_lock); write_unlock(&cd->hash_lock);
spin_unlock(&cache_list_lock); spin_unlock(&cache_list_lock);
if (cd->proc_ent) { remove_cache_proc_entries(cd);
if (cd->flush_ent)
remove_proc_entry("flush", cd->proc_ent);
if (cd->channel_ent)
remove_proc_entry("channel", cd->proc_ent);
if (cd->content_ent)
remove_proc_entry("content", cd->proc_ent);
cd->proc_ent = NULL;
remove_proc_entry(cd->name, proc_net_rpc);
}
if (list_empty(&cache_list)) { if (list_empty(&cache_list)) {
/* module must be being unloaded so its safe to kill the worker */ /* module must be being unloaded so its safe to kill the worker */
cancel_delayed_work_sync(&cache_cleaner); cancel_delayed_work_sync(&cache_cleaner);
} }
return 0; return;
out:
printk(KERN_ERR "nfsd: failed to unregister %s cache\n", cd->name);
} }
EXPORT_SYMBOL(cache_unregister);
/* clean cache tries to find something to clean /* clean cache tries to find something to clean
* and cleans it. * and cleans it.
@ -489,6 +519,7 @@ void cache_flush(void)
while (cache_clean() != -1) while (cache_clean() != -1)
cond_resched(); cond_resched();
} }
EXPORT_SYMBOL(cache_flush);
void cache_purge(struct cache_detail *detail) void cache_purge(struct cache_detail *detail)
{ {
@ -497,7 +528,7 @@ void cache_purge(struct cache_detail *detail)
cache_flush(); cache_flush();
detail->flush_time = 1; detail->flush_time = 1;
} }
EXPORT_SYMBOL(cache_purge);
/* /*
@ -634,13 +665,13 @@ void cache_clean_deferred(void *owner)
/* /*
* communicate with user-space * communicate with user-space
* *
* We have a magic /proc file - /proc/sunrpc/cache * We have a magic /proc file - /proc/sunrpc/<cachename>/channel.
* On read, you get a full request, or block * On read, you get a full request, or block.
* On write, an update request is processed * On write, an update request is processed.
* Poll works if anything to read, and always allows write * Poll works if anything to read, and always allows write.
* *
* Implemented by linked list of requests. Each open file has * Implemented by linked list of requests. Each open file has
* a ->private that also exists in this list. New request are added * a ->private that also exists in this list. New requests are added
* to the end and may wakeup and preceding readers. * to the end and may wakeup and preceding readers.
* New readers are added to the head. If, on read, an item is found with * New readers are added to the head. If, on read, an item is found with
* CACHE_UPCALLING clear, we free it from the list. * CACHE_UPCALLING clear, we free it from the list.
@ -963,6 +994,7 @@ void qword_add(char **bpp, int *lp, char *str)
*bpp = bp; *bpp = bp;
*lp = len; *lp = len;
} }
EXPORT_SYMBOL(qword_add);
void qword_addhex(char **bpp, int *lp, char *buf, int blen) void qword_addhex(char **bpp, int *lp, char *buf, int blen)
{ {
@ -991,6 +1023,7 @@ void qword_addhex(char **bpp, int *lp, char *buf, int blen)
*bpp = bp; *bpp = bp;
*lp = len; *lp = len;
} }
EXPORT_SYMBOL(qword_addhex);
static void warn_no_listener(struct cache_detail *detail) static void warn_no_listener(struct cache_detail *detail)
{ {
@ -1113,6 +1146,7 @@ int qword_get(char **bpp, char *dest, int bufsize)
*dest = '\0'; *dest = '\0';
return len; return len;
} }
EXPORT_SYMBOL(qword_get);
/* /*
@ -1244,18 +1278,18 @@ static ssize_t read_flush(struct file *file, char __user *buf,
struct cache_detail *cd = PDE(file->f_path.dentry->d_inode)->data; struct cache_detail *cd = PDE(file->f_path.dentry->d_inode)->data;
char tbuf[20]; char tbuf[20];
unsigned long p = *ppos; unsigned long p = *ppos;
int len; size_t len;
sprintf(tbuf, "%lu\n", cd->flush_time); sprintf(tbuf, "%lu\n", cd->flush_time);
len = strlen(tbuf); len = strlen(tbuf);
if (p >= len) if (p >= len)
return 0; return 0;
len -= p; len -= p;
if (len > count) len = count; if (len > count)
len = count;
if (copy_to_user(buf, (void*)(tbuf+p), len)) if (copy_to_user(buf, (void*)(tbuf+p), len))
len = -EFAULT; return -EFAULT;
else *ppos += len;
*ppos += len;
return len; return len;
} }

View File

@ -33,7 +33,7 @@ struct proc_dir_entry *proc_net_rpc = NULL;
static int rpc_proc_show(struct seq_file *seq, void *v) { static int rpc_proc_show(struct seq_file *seq, void *v) {
const struct rpc_stat *statp = seq->private; const struct rpc_stat *statp = seq->private;
const struct rpc_program *prog = statp->program; const struct rpc_program *prog = statp->program;
int i, j; unsigned int i, j;
seq_printf(seq, seq_printf(seq,
"net %u %u %u %u\n", "net %u %u %u %u\n",
@ -81,7 +81,7 @@ void svc_seq_show(struct seq_file *seq, const struct svc_stat *statp) {
const struct svc_program *prog = statp->program; const struct svc_program *prog = statp->program;
const struct svc_procedure *proc; const struct svc_procedure *proc;
const struct svc_version *vers; const struct svc_version *vers;
int i, j; unsigned int i, j;
seq_printf(seq, seq_printf(seq,
"net %u %u %u %u\n", "net %u %u %u %u\n",
@ -106,6 +106,7 @@ void svc_seq_show(struct seq_file *seq, const struct svc_stat *statp) {
seq_putc(seq, '\n'); seq_putc(seq, '\n');
} }
} }
EXPORT_SYMBOL(svc_seq_show);
/** /**
* rpc_alloc_iostats - allocate an rpc_iostats structure * rpc_alloc_iostats - allocate an rpc_iostats structure
@ -255,12 +256,14 @@ svc_proc_register(struct svc_stat *statp, const struct file_operations *fops)
{ {
return do_register(statp->program->pg_name, statp, fops); return do_register(statp->program->pg_name, statp, fops);
} }
EXPORT_SYMBOL(svc_proc_register);
void void
svc_proc_unregister(const char *name) svc_proc_unregister(const char *name)
{ {
remove_proc_entry(name, proc_net_rpc); remove_proc_entry(name, proc_net_rpc);
} }
EXPORT_SYMBOL(svc_proc_unregister);
void void
rpc_proc_init(void) rpc_proc_init(void)

View File

@ -22,48 +22,6 @@
#include <linux/sunrpc/rpc_pipe_fs.h> #include <linux/sunrpc/rpc_pipe_fs.h>
#include <linux/sunrpc/xprtsock.h> #include <linux/sunrpc/xprtsock.h>
/* RPC server stuff */
EXPORT_SYMBOL(svc_create);
EXPORT_SYMBOL(svc_create_thread);
EXPORT_SYMBOL(svc_create_pooled);
EXPORT_SYMBOL(svc_set_num_threads);
EXPORT_SYMBOL(svc_exit_thread);
EXPORT_SYMBOL(svc_destroy);
EXPORT_SYMBOL(svc_drop);
EXPORT_SYMBOL(svc_process);
EXPORT_SYMBOL(svc_recv);
EXPORT_SYMBOL(svc_wake_up);
EXPORT_SYMBOL(svc_makesock);
EXPORT_SYMBOL(svc_reserve);
EXPORT_SYMBOL(svc_auth_register);
EXPORT_SYMBOL(auth_domain_lookup);
EXPORT_SYMBOL(svc_authenticate);
EXPORT_SYMBOL(svc_set_client);
/* RPC statistics */
#ifdef CONFIG_PROC_FS
EXPORT_SYMBOL(svc_proc_register);
EXPORT_SYMBOL(svc_proc_unregister);
EXPORT_SYMBOL(svc_seq_show);
#endif
/* caching... */
EXPORT_SYMBOL(auth_domain_find);
EXPORT_SYMBOL(auth_domain_put);
EXPORT_SYMBOL(auth_unix_add_addr);
EXPORT_SYMBOL(auth_unix_forget_old);
EXPORT_SYMBOL(auth_unix_lookup);
EXPORT_SYMBOL(cache_check);
EXPORT_SYMBOL(cache_flush);
EXPORT_SYMBOL(cache_purge);
EXPORT_SYMBOL(cache_register);
EXPORT_SYMBOL(cache_unregister);
EXPORT_SYMBOL(qword_add);
EXPORT_SYMBOL(qword_addhex);
EXPORT_SYMBOL(qword_get);
EXPORT_SYMBOL(svcauth_unix_purge);
EXPORT_SYMBOL(unix_domain_find);
extern struct cache_detail ip_map_cache, unix_gid_cache; extern struct cache_detail ip_map_cache, unix_gid_cache;
static int __init static int __init
@ -85,7 +43,8 @@ init_sunrpc(void)
#endif #endif
cache_register(&ip_map_cache); cache_register(&ip_map_cache);
cache_register(&unix_gid_cache); cache_register(&unix_gid_cache);
init_socket_xprt(); svc_init_xprt_sock(); /* svc sock transport */
init_socket_xprt(); /* clnt sock transport */
rpcauth_init_module(); rpcauth_init_module();
out: out:
return err; return err;
@ -96,12 +55,11 @@ cleanup_sunrpc(void)
{ {
rpcauth_remove_module(); rpcauth_remove_module();
cleanup_socket_xprt(); cleanup_socket_xprt();
svc_cleanup_xprt_sock();
unregister_rpc_pipefs(); unregister_rpc_pipefs();
rpc_destroy_mempool(); rpc_destroy_mempool();
if (cache_unregister(&ip_map_cache)) cache_unregister(&ip_map_cache);
printk(KERN_ERR "sunrpc: failed to unregister ip_map cache\n"); cache_unregister(&unix_gid_cache);
if (cache_unregister(&unix_gid_cache))
printk(KERN_ERR "sunrpc: failed to unregister unix_gid cache\n");
#ifdef RPC_DEBUG #ifdef RPC_DEBUG
rpc_unregister_sysctl(); rpc_unregister_sysctl();
#endif #endif

View File

@ -364,7 +364,7 @@ __svc_create(struct svc_program *prog, unsigned int bufsize, int npools,
void (*shutdown)(struct svc_serv *serv)) void (*shutdown)(struct svc_serv *serv))
{ {
struct svc_serv *serv; struct svc_serv *serv;
int vers; unsigned int vers;
unsigned int xdrsize; unsigned int xdrsize;
unsigned int i; unsigned int i;
@ -433,6 +433,7 @@ svc_create(struct svc_program *prog, unsigned int bufsize,
{ {
return __svc_create(prog, bufsize, /*npools*/1, shutdown); return __svc_create(prog, bufsize, /*npools*/1, shutdown);
} }
EXPORT_SYMBOL(svc_create);
struct svc_serv * struct svc_serv *
svc_create_pooled(struct svc_program *prog, unsigned int bufsize, svc_create_pooled(struct svc_program *prog, unsigned int bufsize,
@ -452,6 +453,7 @@ svc_create_pooled(struct svc_program *prog, unsigned int bufsize,
return serv; return serv;
} }
EXPORT_SYMBOL(svc_create_pooled);
/* /*
* Destroy an RPC service. Should be called with the BKL held * Destroy an RPC service. Should be called with the BKL held
@ -459,9 +461,6 @@ svc_create_pooled(struct svc_program *prog, unsigned int bufsize,
void void
svc_destroy(struct svc_serv *serv) svc_destroy(struct svc_serv *serv)
{ {
struct svc_sock *svsk;
struct svc_sock *tmp;
dprintk("svc: svc_destroy(%s, %d)\n", dprintk("svc: svc_destroy(%s, %d)\n",
serv->sv_program->pg_name, serv->sv_program->pg_name,
serv->sv_nrthreads); serv->sv_nrthreads);
@ -476,14 +475,12 @@ svc_destroy(struct svc_serv *serv)
del_timer_sync(&serv->sv_temptimer); del_timer_sync(&serv->sv_temptimer);
list_for_each_entry_safe(svsk, tmp, &serv->sv_tempsocks, sk_list) svc_close_all(&serv->sv_tempsocks);
svc_force_close_socket(svsk);
if (serv->sv_shutdown) if (serv->sv_shutdown)
serv->sv_shutdown(serv); serv->sv_shutdown(serv);
list_for_each_entry_safe(svsk, tmp, &serv->sv_permsocks, sk_list) svc_close_all(&serv->sv_permsocks);
svc_force_close_socket(svsk);
BUG_ON(!list_empty(&serv->sv_permsocks)); BUG_ON(!list_empty(&serv->sv_permsocks));
BUG_ON(!list_empty(&serv->sv_tempsocks)); BUG_ON(!list_empty(&serv->sv_tempsocks));
@ -498,6 +495,7 @@ svc_destroy(struct svc_serv *serv)
kfree(serv->sv_pools); kfree(serv->sv_pools);
kfree(serv); kfree(serv);
} }
EXPORT_SYMBOL(svc_destroy);
/* /*
* Allocate an RPC server's buffer space. * Allocate an RPC server's buffer space.
@ -536,6 +534,44 @@ svc_release_buffer(struct svc_rqst *rqstp)
put_page(rqstp->rq_pages[i]); put_page(rqstp->rq_pages[i]);
} }
struct svc_rqst *
svc_prepare_thread(struct svc_serv *serv, struct svc_pool *pool)
{
struct svc_rqst *rqstp;
rqstp = kzalloc(sizeof(*rqstp), GFP_KERNEL);
if (!rqstp)
goto out_enomem;
init_waitqueue_head(&rqstp->rq_wait);
serv->sv_nrthreads++;
spin_lock_bh(&pool->sp_lock);
pool->sp_nrthreads++;
list_add(&rqstp->rq_all, &pool->sp_all_threads);
spin_unlock_bh(&pool->sp_lock);
rqstp->rq_server = serv;
rqstp->rq_pool = pool;
rqstp->rq_argp = kmalloc(serv->sv_xdrsize, GFP_KERNEL);
if (!rqstp->rq_argp)
goto out_thread;
rqstp->rq_resp = kmalloc(serv->sv_xdrsize, GFP_KERNEL);
if (!rqstp->rq_resp)
goto out_thread;
if (!svc_init_buffer(rqstp, serv->sv_max_mesg))
goto out_thread;
return rqstp;
out_thread:
svc_exit_thread(rqstp);
out_enomem:
return ERR_PTR(-ENOMEM);
}
EXPORT_SYMBOL(svc_prepare_thread);
/* /*
* Create a thread in the given pool. Caller must hold BKL. * Create a thread in the given pool. Caller must hold BKL.
* On a NUMA or SMP machine, with a multi-pool serv, the thread * On a NUMA or SMP machine, with a multi-pool serv, the thread
@ -550,24 +586,11 @@ __svc_create_thread(svc_thread_fn func, struct svc_serv *serv,
int have_oldmask = 0; int have_oldmask = 0;
cpumask_t oldmask; cpumask_t oldmask;
rqstp = kzalloc(sizeof(*rqstp), GFP_KERNEL); rqstp = svc_prepare_thread(serv, pool);
if (!rqstp) if (IS_ERR(rqstp)) {
error = PTR_ERR(rqstp);
goto out; goto out;
}
init_waitqueue_head(&rqstp->rq_wait);
if (!(rqstp->rq_argp = kmalloc(serv->sv_xdrsize, GFP_KERNEL))
|| !(rqstp->rq_resp = kmalloc(serv->sv_xdrsize, GFP_KERNEL))
|| !svc_init_buffer(rqstp, serv->sv_max_mesg))
goto out_thread;
serv->sv_nrthreads++;
spin_lock_bh(&pool->sp_lock);
pool->sp_nrthreads++;
list_add(&rqstp->rq_all, &pool->sp_all_threads);
spin_unlock_bh(&pool->sp_lock);
rqstp->rq_server = serv;
rqstp->rq_pool = pool;
if (serv->sv_nrpools > 1) if (serv->sv_nrpools > 1)
have_oldmask = svc_pool_map_set_cpumask(pool->sp_id, &oldmask); have_oldmask = svc_pool_map_set_cpumask(pool->sp_id, &oldmask);
@ -597,6 +620,7 @@ svc_create_thread(svc_thread_fn func, struct svc_serv *serv)
{ {
return __svc_create_thread(func, serv, &serv->sv_pools[0]); return __svc_create_thread(func, serv, &serv->sv_pools[0]);
} }
EXPORT_SYMBOL(svc_create_thread);
/* /*
* Choose a pool in which to create a new thread, for svc_set_num_threads * Choose a pool in which to create a new thread, for svc_set_num_threads
@ -700,6 +724,7 @@ svc_set_num_threads(struct svc_serv *serv, struct svc_pool *pool, int nrservs)
return error; return error;
} }
EXPORT_SYMBOL(svc_set_num_threads);
/* /*
* Called from a server thread as it's exiting. Caller must hold BKL. * Called from a server thread as it's exiting. Caller must hold BKL.
@ -726,6 +751,7 @@ svc_exit_thread(struct svc_rqst *rqstp)
if (serv) if (serv)
svc_destroy(serv); svc_destroy(serv);
} }
EXPORT_SYMBOL(svc_exit_thread);
/* /*
* Register an RPC service with the local portmapper. * Register an RPC service with the local portmapper.
@ -737,7 +763,8 @@ svc_register(struct svc_serv *serv, int proto, unsigned short port)
{ {
struct svc_program *progp; struct svc_program *progp;
unsigned long flags; unsigned long flags;
int i, error = 0, dummy; unsigned int i;
int error = 0, dummy;
if (!port) if (!port)
clear_thread_flag(TIF_SIGPENDING); clear_thread_flag(TIF_SIGPENDING);
@ -840,9 +867,9 @@ svc_process(struct svc_rqst *rqstp)
rqstp->rq_res.tail[0].iov_len = 0; rqstp->rq_res.tail[0].iov_len = 0;
/* Will be turned off only in gss privacy case: */ /* Will be turned off only in gss privacy case: */
rqstp->rq_splice_ok = 1; rqstp->rq_splice_ok = 1;
/* tcp needs a space for the record length... */
if (rqstp->rq_prot == IPPROTO_TCP) /* Setup reply header */
svc_putnl(resv, 0); rqstp->rq_xprt->xpt_ops->xpo_prep_reply_hdr(rqstp);
rqstp->rq_xid = svc_getu32(argv); rqstp->rq_xid = svc_getu32(argv);
svc_putu32(resv, rqstp->rq_xid); svc_putu32(resv, rqstp->rq_xid);
@ -1049,16 +1076,15 @@ err_bad:
svc_putnl(resv, ntohl(rpc_stat)); svc_putnl(resv, ntohl(rpc_stat));
goto sendit; goto sendit;
} }
EXPORT_SYMBOL(svc_process);
/* /*
* Return (transport-specific) limit on the rpc payload. * Return (transport-specific) limit on the rpc payload.
*/ */
u32 svc_max_payload(const struct svc_rqst *rqstp) u32 svc_max_payload(const struct svc_rqst *rqstp)
{ {
int max = RPCSVC_MAXPAYLOAD_TCP; u32 max = rqstp->rq_xprt->xpt_class->xcl_max_payload;
if (rqstp->rq_sock->sk_sock->type == SOCK_DGRAM)
max = RPCSVC_MAXPAYLOAD_UDP;
if (rqstp->rq_server->sv_max_payload < max) if (rqstp->rq_server->sv_max_payload < max)
max = rqstp->rq_server->sv_max_payload; max = rqstp->rq_server->sv_max_payload;
return max; return max;

1055
net/sunrpc/svc_xprt.c Normal file

File diff suppressed because it is too large Load Diff

View File

@ -57,11 +57,13 @@ svc_authenticate(struct svc_rqst *rqstp, __be32 *authp)
rqstp->rq_authop = aops; rqstp->rq_authop = aops;
return aops->accept(rqstp, authp); return aops->accept(rqstp, authp);
} }
EXPORT_SYMBOL(svc_authenticate);
int svc_set_client(struct svc_rqst *rqstp) int svc_set_client(struct svc_rqst *rqstp)
{ {
return rqstp->rq_authop->set_client(rqstp); return rqstp->rq_authop->set_client(rqstp);
} }
EXPORT_SYMBOL(svc_set_client);
/* A request, which was authenticated, has now executed. /* A request, which was authenticated, has now executed.
* Time to finalise the credentials and verifier * Time to finalise the credentials and verifier
@ -93,6 +95,7 @@ svc_auth_register(rpc_authflavor_t flavor, struct auth_ops *aops)
spin_unlock(&authtab_lock); spin_unlock(&authtab_lock);
return rv; return rv;
} }
EXPORT_SYMBOL(svc_auth_register);
void void
svc_auth_unregister(rpc_authflavor_t flavor) svc_auth_unregister(rpc_authflavor_t flavor)
@ -129,6 +132,7 @@ void auth_domain_put(struct auth_domain *dom)
spin_unlock(&auth_domain_lock); spin_unlock(&auth_domain_lock);
} }
} }
EXPORT_SYMBOL(auth_domain_put);
struct auth_domain * struct auth_domain *
auth_domain_lookup(char *name, struct auth_domain *new) auth_domain_lookup(char *name, struct auth_domain *new)
@ -153,8 +157,10 @@ auth_domain_lookup(char *name, struct auth_domain *new)
spin_unlock(&auth_domain_lock); spin_unlock(&auth_domain_lock);
return new; return new;
} }
EXPORT_SYMBOL(auth_domain_lookup);
struct auth_domain *auth_domain_find(char *name) struct auth_domain *auth_domain_find(char *name)
{ {
return auth_domain_lookup(name, NULL); return auth_domain_lookup(name, NULL);
} }
EXPORT_SYMBOL(auth_domain_find);

View File

@ -63,6 +63,7 @@ struct auth_domain *unix_domain_find(char *name)
rv = auth_domain_lookup(name, &new->h); rv = auth_domain_lookup(name, &new->h);
} }
} }
EXPORT_SYMBOL(unix_domain_find);
static void svcauth_unix_domain_release(struct auth_domain *dom) static void svcauth_unix_domain_release(struct auth_domain *dom)
{ {
@ -340,6 +341,7 @@ int auth_unix_add_addr(struct in_addr addr, struct auth_domain *dom)
else else
return -ENOMEM; return -ENOMEM;
} }
EXPORT_SYMBOL(auth_unix_add_addr);
int auth_unix_forget_old(struct auth_domain *dom) int auth_unix_forget_old(struct auth_domain *dom)
{ {
@ -351,6 +353,7 @@ int auth_unix_forget_old(struct auth_domain *dom)
udom->addr_changes++; udom->addr_changes++;
return 0; return 0;
} }
EXPORT_SYMBOL(auth_unix_forget_old);
struct auth_domain *auth_unix_lookup(struct in_addr addr) struct auth_domain *auth_unix_lookup(struct in_addr addr)
{ {
@ -375,50 +378,56 @@ struct auth_domain *auth_unix_lookup(struct in_addr addr)
cache_put(&ipm->h, &ip_map_cache); cache_put(&ipm->h, &ip_map_cache);
return rv; return rv;
} }
EXPORT_SYMBOL(auth_unix_lookup);
void svcauth_unix_purge(void) void svcauth_unix_purge(void)
{ {
cache_purge(&ip_map_cache); cache_purge(&ip_map_cache);
} }
EXPORT_SYMBOL(svcauth_unix_purge);
static inline struct ip_map * static inline struct ip_map *
ip_map_cached_get(struct svc_rqst *rqstp) ip_map_cached_get(struct svc_rqst *rqstp)
{ {
struct ip_map *ipm; struct ip_map *ipm = NULL;
struct svc_sock *svsk = rqstp->rq_sock; struct svc_xprt *xprt = rqstp->rq_xprt;
spin_lock(&svsk->sk_lock);
ipm = svsk->sk_info_authunix; if (test_bit(XPT_CACHE_AUTH, &xprt->xpt_flags)) {
if (ipm != NULL) { spin_lock(&xprt->xpt_lock);
if (!cache_valid(&ipm->h)) { ipm = xprt->xpt_auth_cache;
/* if (ipm != NULL) {
* The entry has been invalidated since it was if (!cache_valid(&ipm->h)) {
* remembered, e.g. by a second mount from the /*
* same IP address. * The entry has been invalidated since it was
*/ * remembered, e.g. by a second mount from the
svsk->sk_info_authunix = NULL; * same IP address.
spin_unlock(&svsk->sk_lock); */
cache_put(&ipm->h, &ip_map_cache); xprt->xpt_auth_cache = NULL;
return NULL; spin_unlock(&xprt->xpt_lock);
cache_put(&ipm->h, &ip_map_cache);
return NULL;
}
cache_get(&ipm->h);
} }
cache_get(&ipm->h); spin_unlock(&xprt->xpt_lock);
} }
spin_unlock(&svsk->sk_lock);
return ipm; return ipm;
} }
static inline void static inline void
ip_map_cached_put(struct svc_rqst *rqstp, struct ip_map *ipm) ip_map_cached_put(struct svc_rqst *rqstp, struct ip_map *ipm)
{ {
struct svc_sock *svsk = rqstp->rq_sock; struct svc_xprt *xprt = rqstp->rq_xprt;
spin_lock(&svsk->sk_lock); if (test_bit(XPT_CACHE_AUTH, &xprt->xpt_flags)) {
if (svsk->sk_sock->type == SOCK_STREAM && spin_lock(&xprt->xpt_lock);
svsk->sk_info_authunix == NULL) { if (xprt->xpt_auth_cache == NULL) {
/* newly cached, keep the reference */ /* newly cached, keep the reference */
svsk->sk_info_authunix = ipm; xprt->xpt_auth_cache = ipm;
ipm = NULL; ipm = NULL;
}
spin_unlock(&xprt->xpt_lock);
} }
spin_unlock(&svsk->sk_lock);
if (ipm) if (ipm)
cache_put(&ipm->h, &ip_map_cache); cache_put(&ipm->h, &ip_map_cache);
} }

File diff suppressed because it is too large Load Diff

View File

@ -18,6 +18,7 @@
#include <linux/sunrpc/types.h> #include <linux/sunrpc/types.h>
#include <linux/sunrpc/sched.h> #include <linux/sunrpc/sched.h>
#include <linux/sunrpc/stats.h> #include <linux/sunrpc/stats.h>
#include <linux/sunrpc/svc_xprt.h>
/* /*
* Declare the debug flags here * Declare the debug flags here
@ -55,6 +56,30 @@ rpc_unregister_sysctl(void)
} }
} }
static int proc_do_xprt(ctl_table *table, int write, struct file *file,
void __user *buffer, size_t *lenp, loff_t *ppos)
{
char tmpbuf[256];
int len;
if ((*ppos && !write) || !*lenp) {
*lenp = 0;
return 0;
}
if (write)
return -EINVAL;
else {
len = svc_print_xprts(tmpbuf, sizeof(tmpbuf));
if (!access_ok(VERIFY_WRITE, buffer, len))
return -EFAULT;
if (__copy_to_user(buffer, tmpbuf, len))
return -EFAULT;
}
*lenp -= len;
*ppos += len;
return 0;
}
static int static int
proc_dodebug(ctl_table *table, int write, struct file *file, proc_dodebug(ctl_table *table, int write, struct file *file,
void __user *buffer, size_t *lenp, loff_t *ppos) void __user *buffer, size_t *lenp, loff_t *ppos)
@ -147,6 +172,12 @@ static ctl_table debug_table[] = {
.mode = 0644, .mode = 0644,
.proc_handler = &proc_dodebug .proc_handler = &proc_dodebug
}, },
{
.procname = "transports",
.maxlen = 256,
.mode = 0444,
.proc_handler = &proc_do_xprt,
},
{ .ctl_name = 0 } { .ctl_name = 0 }
}; };

View File

@ -96,11 +96,13 @@ xdr_encode_string(__be32 *p, const char *string)
EXPORT_SYMBOL(xdr_encode_string); EXPORT_SYMBOL(xdr_encode_string);
__be32 * __be32 *
xdr_decode_string_inplace(__be32 *p, char **sp, int *lenp, int maxlen) xdr_decode_string_inplace(__be32 *p, char **sp,
unsigned int *lenp, unsigned int maxlen)
{ {
unsigned int len; u32 len;
if ((len = ntohl(*p++)) > maxlen) len = ntohl(*p++);
if (len > maxlen)
return NULL; return NULL;
*lenp = len; *lenp = len;
*sp = (char *) p; *sp = (char *) p;

View File

@ -1,3 +1,8 @@
obj-$(CONFIG_SUNRPC_XPRT_RDMA) += xprtrdma.o obj-$(CONFIG_SUNRPC_XPRT_RDMA) += xprtrdma.o
xprtrdma-y := transport.o rpc_rdma.o verbs.o xprtrdma-y := transport.o rpc_rdma.o verbs.o
obj-$(CONFIG_SUNRPC_XPRT_RDMA) += svcrdma.o
svcrdma-y := svc_rdma.o svc_rdma_transport.o \
svc_rdma_marshal.o svc_rdma_sendto.o svc_rdma_recvfrom.o

View File

@ -0,0 +1,266 @@
/*
* Copyright (c) 2005-2006 Network Appliance, Inc. All rights reserved.
*
* This software is available to you under a choice of one of two
* licenses. You may choose to be licensed under the terms of the GNU
* General Public License (GPL) Version 2, available from the file
* COPYING in the main directory of this source tree, or the BSD-type
* license below:
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
*
* Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
*
* Redistributions in binary form must reproduce the above
* copyright notice, this list of conditions and the following
* disclaimer in the documentation and/or other materials provided
* with the distribution.
*
* Neither the name of the Network Appliance, Inc. nor the names of
* its contributors may be used to endorse or promote products
* derived from this software without specific prior written
* permission.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
* "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
* LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
* A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
* OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
* SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
* LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
* DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
* THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
* OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*
* Author: Tom Tucker <tom@opengridcomputing.com>
*/
#include <linux/module.h>
#include <linux/init.h>
#include <linux/fs.h>
#include <linux/sysctl.h>
#include <linux/sunrpc/clnt.h>
#include <linux/sunrpc/sched.h>
#include <linux/sunrpc/svc_rdma.h>
#define RPCDBG_FACILITY RPCDBG_SVCXPRT
/* RPC/RDMA parameters */
unsigned int svcrdma_ord = RPCRDMA_ORD;
static unsigned int min_ord = 1;
static unsigned int max_ord = 4096;
unsigned int svcrdma_max_requests = RPCRDMA_MAX_REQUESTS;
static unsigned int min_max_requests = 4;
static unsigned int max_max_requests = 16384;
unsigned int svcrdma_max_req_size = RPCRDMA_MAX_REQ_SIZE;
static unsigned int min_max_inline = 4096;
static unsigned int max_max_inline = 65536;
atomic_t rdma_stat_recv;
atomic_t rdma_stat_read;
atomic_t rdma_stat_write;
atomic_t rdma_stat_sq_starve;
atomic_t rdma_stat_rq_starve;
atomic_t rdma_stat_rq_poll;
atomic_t rdma_stat_rq_prod;
atomic_t rdma_stat_sq_poll;
atomic_t rdma_stat_sq_prod;
/*
* This function implements reading and resetting an atomic_t stat
* variable through read/write to a proc file. Any write to the file
* resets the associated statistic to zero. Any read returns it's
* current value.
*/
static int read_reset_stat(ctl_table *table, int write,
struct file *filp, void __user *buffer, size_t *lenp,
loff_t *ppos)
{
atomic_t *stat = (atomic_t *)table->data;
if (!stat)
return -EINVAL;
if (write)
atomic_set(stat, 0);
else {
char str_buf[32];
char *data;
int len = snprintf(str_buf, 32, "%d\n", atomic_read(stat));
if (len >= 32)
return -EFAULT;
len = strlen(str_buf);
if (*ppos > len) {
*lenp = 0;
return 0;
}
data = &str_buf[*ppos];
len -= *ppos;
if (len > *lenp)
len = *lenp;
if (len && copy_to_user(buffer, str_buf, len))
return -EFAULT;
*lenp = len;
*ppos += len;
}
return 0;
}
static struct ctl_table_header *svcrdma_table_header;
static ctl_table svcrdma_parm_table[] = {
{
.procname = "max_requests",
.data = &svcrdma_max_requests,
.maxlen = sizeof(unsigned int),
.mode = 0644,
.proc_handler = &proc_dointvec_minmax,
.strategy = &sysctl_intvec,
.extra1 = &min_max_requests,
.extra2 = &max_max_requests
},
{
.procname = "max_req_size",
.data = &svcrdma_max_req_size,
.maxlen = sizeof(unsigned int),
.mode = 0644,
.proc_handler = &proc_dointvec_minmax,
.strategy = &sysctl_intvec,
.extra1 = &min_max_inline,
.extra2 = &max_max_inline
},
{
.procname = "max_outbound_read_requests",
.data = &svcrdma_ord,
.maxlen = sizeof(unsigned int),
.mode = 0644,
.proc_handler = &proc_dointvec_minmax,
.strategy = &sysctl_intvec,
.extra1 = &min_ord,
.extra2 = &max_ord,
},
{
.procname = "rdma_stat_read",
.data = &rdma_stat_read,
.maxlen = sizeof(atomic_t),
.mode = 0644,
.proc_handler = &read_reset_stat,
},
{
.procname = "rdma_stat_recv",
.data = &rdma_stat_recv,
.maxlen = sizeof(atomic_t),
.mode = 0644,
.proc_handler = &read_reset_stat,
},
{
.procname = "rdma_stat_write",
.data = &rdma_stat_write,
.maxlen = sizeof(atomic_t),
.mode = 0644,
.proc_handler = &read_reset_stat,
},
{
.procname = "rdma_stat_sq_starve",
.data = &rdma_stat_sq_starve,
.maxlen = sizeof(atomic_t),
.mode = 0644,
.proc_handler = &read_reset_stat,
},
{
.procname = "rdma_stat_rq_starve",
.data = &rdma_stat_rq_starve,
.maxlen = sizeof(atomic_t),
.mode = 0644,
.proc_handler = &read_reset_stat,
},
{
.procname = "rdma_stat_rq_poll",
.data = &rdma_stat_rq_poll,
.maxlen = sizeof(atomic_t),
.mode = 0644,
.proc_handler = &read_reset_stat,
},
{
.procname = "rdma_stat_rq_prod",
.data = &rdma_stat_rq_prod,
.maxlen = sizeof(atomic_t),
.mode = 0644,
.proc_handler = &read_reset_stat,
},
{
.procname = "rdma_stat_sq_poll",
.data = &rdma_stat_sq_poll,
.maxlen = sizeof(atomic_t),
.mode = 0644,
.proc_handler = &read_reset_stat,
},
{
.procname = "rdma_stat_sq_prod",
.data = &rdma_stat_sq_prod,
.maxlen = sizeof(atomic_t),
.mode = 0644,
.proc_handler = &read_reset_stat,
},
{
.ctl_name = 0,
},
};
static ctl_table svcrdma_table[] = {
{
.procname = "svc_rdma",
.mode = 0555,
.child = svcrdma_parm_table
},
{
.ctl_name = 0,
},
};
static ctl_table svcrdma_root_table[] = {
{
.ctl_name = CTL_SUNRPC,
.procname = "sunrpc",
.mode = 0555,
.child = svcrdma_table
},
{
.ctl_name = 0,
},
};
void svc_rdma_cleanup(void)
{
dprintk("SVCRDMA Module Removed, deregister RPC RDMA transport\n");
if (svcrdma_table_header) {
unregister_sysctl_table(svcrdma_table_header);
svcrdma_table_header = NULL;
}
svc_unreg_xprt_class(&svc_rdma_class);
}
int svc_rdma_init(void)
{
dprintk("SVCRDMA Module Init, register RPC RDMA transport\n");
dprintk("\tsvcrdma_ord : %d\n", svcrdma_ord);
dprintk("\tmax_requests : %d\n", svcrdma_max_requests);
dprintk("\tsq_depth : %d\n",
svcrdma_max_requests * RPCRDMA_SQ_DEPTH_MULT);
dprintk("\tmax_inline : %d\n", svcrdma_max_req_size);
if (!svcrdma_table_header)
svcrdma_table_header =
register_sysctl_table(svcrdma_root_table);
/* Register RDMA with the SVC transport switch */
svc_reg_xprt_class(&svc_rdma_class);
return 0;
}
MODULE_AUTHOR("Tom Tucker <tom@opengridcomputing.com>");
MODULE_DESCRIPTION("SVC RDMA Transport");
MODULE_LICENSE("Dual BSD/GPL");
module_init(svc_rdma_init);
module_exit(svc_rdma_cleanup);

View File

@ -0,0 +1,412 @@
/*
* Copyright (c) 2005-2006 Network Appliance, Inc. All rights reserved.
*
* This software is available to you under a choice of one of two
* licenses. You may choose to be licensed under the terms of the GNU
* General Public License (GPL) Version 2, available from the file
* COPYING in the main directory of this source tree, or the BSD-type
* license below:
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
*
* Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
*
* Redistributions in binary form must reproduce the above
* copyright notice, this list of conditions and the following
* disclaimer in the documentation and/or other materials provided
* with the distribution.
*
* Neither the name of the Network Appliance, Inc. nor the names of
* its contributors may be used to endorse or promote products
* derived from this software without specific prior written
* permission.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
* "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
* LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
* A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
* OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
* SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
* LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
* DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
* THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
* OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*
* Author: Tom Tucker <tom@opengridcomputing.com>
*/
#include <linux/sunrpc/xdr.h>
#include <linux/sunrpc/debug.h>
#include <asm/unaligned.h>
#include <linux/sunrpc/rpc_rdma.h>
#include <linux/sunrpc/svc_rdma.h>
#define RPCDBG_FACILITY RPCDBG_SVCXPRT
/*
* Decodes a read chunk list. The expected format is as follows:
* descrim : xdr_one
* position : u32 offset into XDR stream
* handle : u32 RKEY
* . . .
* end-of-list: xdr_zero
*/
static u32 *decode_read_list(u32 *va, u32 *vaend)
{
struct rpcrdma_read_chunk *ch = (struct rpcrdma_read_chunk *)va;
while (ch->rc_discrim != xdr_zero) {
u64 ch_offset;
if (((unsigned long)ch + sizeof(struct rpcrdma_read_chunk)) >
(unsigned long)vaend) {
dprintk("svcrdma: vaend=%p, ch=%p\n", vaend, ch);
return NULL;
}
ch->rc_discrim = ntohl(ch->rc_discrim);
ch->rc_position = ntohl(ch->rc_position);
ch->rc_target.rs_handle = ntohl(ch->rc_target.rs_handle);
ch->rc_target.rs_length = ntohl(ch->rc_target.rs_length);
va = (u32 *)&ch->rc_target.rs_offset;
xdr_decode_hyper(va, &ch_offset);
put_unaligned(ch_offset, (u64 *)va);
ch++;
}
return (u32 *)&ch->rc_position;
}
/*
* Determine number of chunks and total bytes in chunk list. The chunk
* list has already been verified to fit within the RPCRDMA header.
*/
void svc_rdma_rcl_chunk_counts(struct rpcrdma_read_chunk *ch,
int *ch_count, int *byte_count)
{
/* compute the number of bytes represented by read chunks */
*byte_count = 0;
*ch_count = 0;
for (; ch->rc_discrim != 0; ch++) {
*byte_count = *byte_count + ch->rc_target.rs_length;
*ch_count = *ch_count + 1;
}
}
/*
* Decodes a write chunk list. The expected format is as follows:
* descrim : xdr_one
* nchunks : <count>
* handle : u32 RKEY ---+
* length : u32 <len of segment> |
* offset : remove va + <count>
* . . . |
* ---+
*/
static u32 *decode_write_list(u32 *va, u32 *vaend)
{
int ch_no;
struct rpcrdma_write_array *ary =
(struct rpcrdma_write_array *)va;
/* Check for not write-array */
if (ary->wc_discrim == xdr_zero)
return (u32 *)&ary->wc_nchunks;
if ((unsigned long)ary + sizeof(struct rpcrdma_write_array) >
(unsigned long)vaend) {
dprintk("svcrdma: ary=%p, vaend=%p\n", ary, vaend);
return NULL;
}
ary->wc_discrim = ntohl(ary->wc_discrim);
ary->wc_nchunks = ntohl(ary->wc_nchunks);
if (((unsigned long)&ary->wc_array[0] +
(sizeof(struct rpcrdma_write_chunk) * ary->wc_nchunks)) >
(unsigned long)vaend) {
dprintk("svcrdma: ary=%p, wc_nchunks=%d, vaend=%p\n",
ary, ary->wc_nchunks, vaend);
return NULL;
}
for (ch_no = 0; ch_no < ary->wc_nchunks; ch_no++) {
u64 ch_offset;
ary->wc_array[ch_no].wc_target.rs_handle =
ntohl(ary->wc_array[ch_no].wc_target.rs_handle);
ary->wc_array[ch_no].wc_target.rs_length =
ntohl(ary->wc_array[ch_no].wc_target.rs_length);
va = (u32 *)&ary->wc_array[ch_no].wc_target.rs_offset;
xdr_decode_hyper(va, &ch_offset);
put_unaligned(ch_offset, (u64 *)va);
}
/*
* rs_length is the 2nd 4B field in wc_target and taking its
* address skips the list terminator
*/
return (u32 *)&ary->wc_array[ch_no].wc_target.rs_length;
}
static u32 *decode_reply_array(u32 *va, u32 *vaend)
{
int ch_no;
struct rpcrdma_write_array *ary =
(struct rpcrdma_write_array *)va;
/* Check for no reply-array */
if (ary->wc_discrim == xdr_zero)
return (u32 *)&ary->wc_nchunks;
if ((unsigned long)ary + sizeof(struct rpcrdma_write_array) >
(unsigned long)vaend) {
dprintk("svcrdma: ary=%p, vaend=%p\n", ary, vaend);
return NULL;
}
ary->wc_discrim = ntohl(ary->wc_discrim);
ary->wc_nchunks = ntohl(ary->wc_nchunks);
if (((unsigned long)&ary->wc_array[0] +
(sizeof(struct rpcrdma_write_chunk) * ary->wc_nchunks)) >
(unsigned long)vaend) {
dprintk("svcrdma: ary=%p, wc_nchunks=%d, vaend=%p\n",
ary, ary->wc_nchunks, vaend);
return NULL;
}
for (ch_no = 0; ch_no < ary->wc_nchunks; ch_no++) {
u64 ch_offset;
ary->wc_array[ch_no].wc_target.rs_handle =
ntohl(ary->wc_array[ch_no].wc_target.rs_handle);
ary->wc_array[ch_no].wc_target.rs_length =
ntohl(ary->wc_array[ch_no].wc_target.rs_length);
va = (u32 *)&ary->wc_array[ch_no].wc_target.rs_offset;
xdr_decode_hyper(va, &ch_offset);
put_unaligned(ch_offset, (u64 *)va);
}
return (u32 *)&ary->wc_array[ch_no];
}
int svc_rdma_xdr_decode_req(struct rpcrdma_msg **rdma_req,
struct svc_rqst *rqstp)
{
struct rpcrdma_msg *rmsgp = NULL;
u32 *va;
u32 *vaend;
u32 hdr_len;
rmsgp = (struct rpcrdma_msg *)rqstp->rq_arg.head[0].iov_base;
/* Verify that there's enough bytes for header + something */
if (rqstp->rq_arg.len <= RPCRDMA_HDRLEN_MIN) {
dprintk("svcrdma: header too short = %d\n",
rqstp->rq_arg.len);
return -EINVAL;
}
/* Decode the header */
rmsgp->rm_xid = ntohl(rmsgp->rm_xid);
rmsgp->rm_vers = ntohl(rmsgp->rm_vers);
rmsgp->rm_credit = ntohl(rmsgp->rm_credit);
rmsgp->rm_type = ntohl(rmsgp->rm_type);
if (rmsgp->rm_vers != RPCRDMA_VERSION)
return -ENOSYS;
/* Pull in the extra for the padded case and bump our pointer */
if (rmsgp->rm_type == RDMA_MSGP) {
int hdrlen;
rmsgp->rm_body.rm_padded.rm_align =
ntohl(rmsgp->rm_body.rm_padded.rm_align);
rmsgp->rm_body.rm_padded.rm_thresh =
ntohl(rmsgp->rm_body.rm_padded.rm_thresh);
va = &rmsgp->rm_body.rm_padded.rm_pempty[4];
rqstp->rq_arg.head[0].iov_base = va;
hdrlen = (u32)((unsigned long)va - (unsigned long)rmsgp);
rqstp->rq_arg.head[0].iov_len -= hdrlen;
if (hdrlen > rqstp->rq_arg.len)
return -EINVAL;
return hdrlen;
}
/* The chunk list may contain either a read chunk list or a write
* chunk list and a reply chunk list.
*/
va = &rmsgp->rm_body.rm_chunks[0];
vaend = (u32 *)((unsigned long)rmsgp + rqstp->rq_arg.len);
va = decode_read_list(va, vaend);
if (!va)
return -EINVAL;
va = decode_write_list(va, vaend);
if (!va)
return -EINVAL;
va = decode_reply_array(va, vaend);
if (!va)
return -EINVAL;
rqstp->rq_arg.head[0].iov_base = va;
hdr_len = (unsigned long)va - (unsigned long)rmsgp;
rqstp->rq_arg.head[0].iov_len -= hdr_len;
*rdma_req = rmsgp;
return hdr_len;
}
int svc_rdma_xdr_decode_deferred_req(struct svc_rqst *rqstp)
{
struct rpcrdma_msg *rmsgp = NULL;
struct rpcrdma_read_chunk *ch;
struct rpcrdma_write_array *ary;
u32 *va;
u32 hdrlen;
dprintk("svcrdma: processing deferred RDMA header on rqstp=%p\n",
rqstp);
rmsgp = (struct rpcrdma_msg *)rqstp->rq_arg.head[0].iov_base;
/* Pull in the extra for the padded case and bump our pointer */
if (rmsgp->rm_type == RDMA_MSGP) {
va = &rmsgp->rm_body.rm_padded.rm_pempty[4];
rqstp->rq_arg.head[0].iov_base = va;
hdrlen = (u32)((unsigned long)va - (unsigned long)rmsgp);
rqstp->rq_arg.head[0].iov_len -= hdrlen;
return hdrlen;
}
/*
* Skip all chunks to find RPC msg. These were previously processed
*/
va = &rmsgp->rm_body.rm_chunks[0];
/* Skip read-list */
for (ch = (struct rpcrdma_read_chunk *)va;
ch->rc_discrim != xdr_zero; ch++);
va = (u32 *)&ch->rc_position;
/* Skip write-list */
ary = (struct rpcrdma_write_array *)va;
if (ary->wc_discrim == xdr_zero)
va = (u32 *)&ary->wc_nchunks;
else
/*
* rs_length is the 2nd 4B field in wc_target and taking its
* address skips the list terminator
*/
va = (u32 *)&ary->wc_array[ary->wc_nchunks].wc_target.rs_length;
/* Skip reply-array */
ary = (struct rpcrdma_write_array *)va;
if (ary->wc_discrim == xdr_zero)
va = (u32 *)&ary->wc_nchunks;
else
va = (u32 *)&ary->wc_array[ary->wc_nchunks];
rqstp->rq_arg.head[0].iov_base = va;
hdrlen = (unsigned long)va - (unsigned long)rmsgp;
rqstp->rq_arg.head[0].iov_len -= hdrlen;
return hdrlen;
}
int svc_rdma_xdr_encode_error(struct svcxprt_rdma *xprt,
struct rpcrdma_msg *rmsgp,
enum rpcrdma_errcode err, u32 *va)
{
u32 *startp = va;
*va++ = htonl(rmsgp->rm_xid);
*va++ = htonl(rmsgp->rm_vers);
*va++ = htonl(xprt->sc_max_requests);
*va++ = htonl(RDMA_ERROR);
*va++ = htonl(err);
if (err == ERR_VERS) {
*va++ = htonl(RPCRDMA_VERSION);
*va++ = htonl(RPCRDMA_VERSION);
}
return (int)((unsigned long)va - (unsigned long)startp);
}
int svc_rdma_xdr_get_reply_hdr_len(struct rpcrdma_msg *rmsgp)
{
struct rpcrdma_write_array *wr_ary;
/* There is no read-list in a reply */
/* skip write list */
wr_ary = (struct rpcrdma_write_array *)
&rmsgp->rm_body.rm_chunks[1];
if (wr_ary->wc_discrim)
wr_ary = (struct rpcrdma_write_array *)
&wr_ary->wc_array[ntohl(wr_ary->wc_nchunks)].
wc_target.rs_length;
else
wr_ary = (struct rpcrdma_write_array *)
&wr_ary->wc_nchunks;
/* skip reply array */
if (wr_ary->wc_discrim)
wr_ary = (struct rpcrdma_write_array *)
&wr_ary->wc_array[ntohl(wr_ary->wc_nchunks)];
else
wr_ary = (struct rpcrdma_write_array *)
&wr_ary->wc_nchunks;
return (unsigned long) wr_ary - (unsigned long) rmsgp;
}
void svc_rdma_xdr_encode_write_list(struct rpcrdma_msg *rmsgp, int chunks)
{
struct rpcrdma_write_array *ary;
/* no read-list */
rmsgp->rm_body.rm_chunks[0] = xdr_zero;
/* write-array discrim */
ary = (struct rpcrdma_write_array *)
&rmsgp->rm_body.rm_chunks[1];
ary->wc_discrim = xdr_one;
ary->wc_nchunks = htonl(chunks);
/* write-list terminator */
ary->wc_array[chunks].wc_target.rs_handle = xdr_zero;
/* reply-array discriminator */
ary->wc_array[chunks].wc_target.rs_length = xdr_zero;
}
void svc_rdma_xdr_encode_reply_array(struct rpcrdma_write_array *ary,
int chunks)
{
ary->wc_discrim = xdr_one;
ary->wc_nchunks = htonl(chunks);
}
void svc_rdma_xdr_encode_array_chunk(struct rpcrdma_write_array *ary,
int chunk_no,
u32 rs_handle, u64 rs_offset,
u32 write_len)
{
struct rpcrdma_segment *seg = &ary->wc_array[chunk_no].wc_target;
seg->rs_handle = htonl(rs_handle);
seg->rs_length = htonl(write_len);
xdr_encode_hyper((u32 *) &seg->rs_offset, rs_offset);
}
void svc_rdma_xdr_encode_reply_header(struct svcxprt_rdma *xprt,
struct rpcrdma_msg *rdma_argp,
struct rpcrdma_msg *rdma_resp,
enum rpcrdma_proc rdma_type)
{
rdma_resp->rm_xid = htonl(rdma_argp->rm_xid);
rdma_resp->rm_vers = htonl(rdma_argp->rm_vers);
rdma_resp->rm_credit = htonl(xprt->sc_max_requests);
rdma_resp->rm_type = htonl(rdma_type);
/* Encode <nul> chunks lists */
rdma_resp->rm_body.rm_chunks[0] = xdr_zero;
rdma_resp->rm_body.rm_chunks[1] = xdr_zero;
rdma_resp->rm_body.rm_chunks[2] = xdr_zero;
}

View File

@ -0,0 +1,586 @@
/*
* Copyright (c) 2005-2006 Network Appliance, Inc. All rights reserved.
*
* This software is available to you under a choice of one of two
* licenses. You may choose to be licensed under the terms of the GNU
* General Public License (GPL) Version 2, available from the file
* COPYING in the main directory of this source tree, or the BSD-type
* license below:
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
*
* Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
*
* Redistributions in binary form must reproduce the above
* copyright notice, this list of conditions and the following
* disclaimer in the documentation and/or other materials provided
* with the distribution.
*
* Neither the name of the Network Appliance, Inc. nor the names of
* its contributors may be used to endorse or promote products
* derived from this software without specific prior written
* permission.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
* "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
* LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
* A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
* OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
* SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
* LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
* DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
* THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
* OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*
* Author: Tom Tucker <tom@opengridcomputing.com>
*/
#include <linux/sunrpc/debug.h>
#include <linux/sunrpc/rpc_rdma.h>
#include <linux/spinlock.h>
#include <asm/unaligned.h>
#include <rdma/ib_verbs.h>
#include <rdma/rdma_cm.h>
#include <linux/sunrpc/svc_rdma.h>
#define RPCDBG_FACILITY RPCDBG_SVCXPRT
/*
* Replace the pages in the rq_argpages array with the pages from the SGE in
* the RDMA_RECV completion. The SGL should contain full pages up until the
* last one.
*/
static void rdma_build_arg_xdr(struct svc_rqst *rqstp,
struct svc_rdma_op_ctxt *ctxt,
u32 byte_count)
{
struct page *page;
u32 bc;
int sge_no;
/* Swap the page in the SGE with the page in argpages */
page = ctxt->pages[0];
put_page(rqstp->rq_pages[0]);
rqstp->rq_pages[0] = page;
/* Set up the XDR head */
rqstp->rq_arg.head[0].iov_base = page_address(page);
rqstp->rq_arg.head[0].iov_len = min(byte_count, ctxt->sge[0].length);
rqstp->rq_arg.len = byte_count;
rqstp->rq_arg.buflen = byte_count;
/* Compute bytes past head in the SGL */
bc = byte_count - rqstp->rq_arg.head[0].iov_len;
/* If data remains, store it in the pagelist */
rqstp->rq_arg.page_len = bc;
rqstp->rq_arg.page_base = 0;
rqstp->rq_arg.pages = &rqstp->rq_pages[1];
sge_no = 1;
while (bc && sge_no < ctxt->count) {
page = ctxt->pages[sge_no];
put_page(rqstp->rq_pages[sge_no]);
rqstp->rq_pages[sge_no] = page;
bc -= min(bc, ctxt->sge[sge_no].length);
rqstp->rq_arg.buflen += ctxt->sge[sge_no].length;
sge_no++;
}
rqstp->rq_respages = &rqstp->rq_pages[sge_no];
/* We should never run out of SGE because the limit is defined to
* support the max allowed RPC data length
*/
BUG_ON(bc && (sge_no == ctxt->count));
BUG_ON((rqstp->rq_arg.head[0].iov_len + rqstp->rq_arg.page_len)
!= byte_count);
BUG_ON(rqstp->rq_arg.len != byte_count);
/* If not all pages were used from the SGL, free the remaining ones */
bc = sge_no;
while (sge_no < ctxt->count) {
page = ctxt->pages[sge_no++];
put_page(page);
}
ctxt->count = bc;
/* Set up tail */
rqstp->rq_arg.tail[0].iov_base = NULL;
rqstp->rq_arg.tail[0].iov_len = 0;
}
struct chunk_sge {
int start; /* sge no for this chunk */
int count; /* sge count for this chunk */
};
/* Encode a read-chunk-list as an array of IB SGE
*
* Assumptions:
* - chunk[0]->position points to pages[0] at an offset of 0
* - pages[] is not physically or virtually contigous and consists of
* PAGE_SIZE elements.
*
* Output:
* - sge array pointing into pages[] array.
* - chunk_sge array specifying sge index and count for each
* chunk in the read list
*
*/
static int rdma_rcl_to_sge(struct svcxprt_rdma *xprt,
struct svc_rqst *rqstp,
struct svc_rdma_op_ctxt *head,
struct rpcrdma_msg *rmsgp,
struct ib_sge *sge,
struct chunk_sge *ch_sge_ary,
int ch_count,
int byte_count)
{
int sge_no;
int sge_bytes;
int page_off;
int page_no;
int ch_bytes;
int ch_no;
struct rpcrdma_read_chunk *ch;
sge_no = 0;
page_no = 0;
page_off = 0;
ch = (struct rpcrdma_read_chunk *)&rmsgp->rm_body.rm_chunks[0];
ch_no = 0;
ch_bytes = ch->rc_target.rs_length;
head->arg.head[0] = rqstp->rq_arg.head[0];
head->arg.tail[0] = rqstp->rq_arg.tail[0];
head->arg.pages = &head->pages[head->count];
head->sge[0].length = head->count; /* save count of hdr pages */
head->arg.page_base = 0;
head->arg.page_len = ch_bytes;
head->arg.len = rqstp->rq_arg.len + ch_bytes;
head->arg.buflen = rqstp->rq_arg.buflen + ch_bytes;
head->count++;
ch_sge_ary[0].start = 0;
while (byte_count) {
sge_bytes = min_t(int, PAGE_SIZE-page_off, ch_bytes);
sge[sge_no].addr =
ib_dma_map_page(xprt->sc_cm_id->device,
rqstp->rq_arg.pages[page_no],
page_off, sge_bytes,
DMA_FROM_DEVICE);
sge[sge_no].length = sge_bytes;
sge[sge_no].lkey = xprt->sc_phys_mr->lkey;
/*
* Don't bump head->count here because the same page
* may be used by multiple SGE.
*/
head->arg.pages[page_no] = rqstp->rq_arg.pages[page_no];
rqstp->rq_respages = &rqstp->rq_arg.pages[page_no+1];
byte_count -= sge_bytes;
ch_bytes -= sge_bytes;
sge_no++;
/*
* If all bytes for this chunk have been mapped to an
* SGE, move to the next SGE
*/
if (ch_bytes == 0) {
ch_sge_ary[ch_no].count =
sge_no - ch_sge_ary[ch_no].start;
ch_no++;
ch++;
ch_sge_ary[ch_no].start = sge_no;
ch_bytes = ch->rc_target.rs_length;
/* If bytes remaining account for next chunk */
if (byte_count) {
head->arg.page_len += ch_bytes;
head->arg.len += ch_bytes;
head->arg.buflen += ch_bytes;
}
}
/*
* If this SGE consumed all of the page, move to the
* next page
*/
if ((sge_bytes + page_off) == PAGE_SIZE) {
page_no++;
page_off = 0;
/*
* If there are still bytes left to map, bump
* the page count
*/
if (byte_count)
head->count++;
} else
page_off += sge_bytes;
}
BUG_ON(byte_count != 0);
return sge_no;
}
static void rdma_set_ctxt_sge(struct svc_rdma_op_ctxt *ctxt,
struct ib_sge *sge,
u64 *sgl_offset,
int count)
{
int i;
ctxt->count = count;
for (i = 0; i < count; i++) {
ctxt->sge[i].addr = sge[i].addr;
ctxt->sge[i].length = sge[i].length;
*sgl_offset = *sgl_offset + sge[i].length;
}
}
static int rdma_read_max_sge(struct svcxprt_rdma *xprt, int sge_count)
{
#ifdef RDMA_TRANSPORT_IWARP
if ((RDMA_TRANSPORT_IWARP ==
rdma_node_get_transport(xprt->sc_cm_id->
device->node_type))
&& sge_count > 1)
return 1;
else
#endif
return min_t(int, sge_count, xprt->sc_max_sge);
}
/*
* Use RDMA_READ to read data from the advertised client buffer into the
* XDR stream starting at rq_arg.head[0].iov_base.
* Each chunk in the array
* contains the following fields:
* discrim - '1', This isn't used for data placement
* position - The xdr stream offset (the same for every chunk)
* handle - RMR for client memory region
* length - data transfer length
* offset - 64 bit tagged offset in remote memory region
*
* On our side, we need to read into a pagelist. The first page immediately
* follows the RPC header.
*
* This function returns 1 to indicate success. The data is not yet in
* the pagelist and therefore the RPC request must be deferred. The
* I/O completion will enqueue the transport again and
* svc_rdma_recvfrom will complete the request.
*
* NOTE: The ctxt must not be touched after the last WR has been posted
* because the I/O completion processing may occur on another
* processor and free / modify the context. Ne touche pas!
*/
static int rdma_read_xdr(struct svcxprt_rdma *xprt,
struct rpcrdma_msg *rmsgp,
struct svc_rqst *rqstp,
struct svc_rdma_op_ctxt *hdr_ctxt)
{
struct ib_send_wr read_wr;
int err = 0;
int ch_no;
struct ib_sge *sge;
int ch_count;
int byte_count;
int sge_count;
u64 sgl_offset;
struct rpcrdma_read_chunk *ch;
struct svc_rdma_op_ctxt *ctxt = NULL;
struct svc_rdma_op_ctxt *head;
struct svc_rdma_op_ctxt *tmp_sge_ctxt;
struct svc_rdma_op_ctxt *tmp_ch_ctxt;
struct chunk_sge *ch_sge_ary;
/* If no read list is present, return 0 */
ch = svc_rdma_get_read_chunk(rmsgp);
if (!ch)
return 0;
/* Allocate temporary contexts to keep SGE */
BUG_ON(sizeof(struct ib_sge) < sizeof(struct chunk_sge));
tmp_sge_ctxt = svc_rdma_get_context(xprt);
sge = tmp_sge_ctxt->sge;
tmp_ch_ctxt = svc_rdma_get_context(xprt);
ch_sge_ary = (struct chunk_sge *)tmp_ch_ctxt->sge;
svc_rdma_rcl_chunk_counts(ch, &ch_count, &byte_count);
sge_count = rdma_rcl_to_sge(xprt, rqstp, hdr_ctxt, rmsgp,
sge, ch_sge_ary,
ch_count, byte_count);
head = svc_rdma_get_context(xprt);
sgl_offset = 0;
ch_no = 0;
for (ch = (struct rpcrdma_read_chunk *)&rmsgp->rm_body.rm_chunks[0];
ch->rc_discrim != 0; ch++, ch_no++) {
next_sge:
if (!ctxt)
ctxt = head;
else {
ctxt->next = svc_rdma_get_context(xprt);
ctxt = ctxt->next;
}
ctxt->next = NULL;
ctxt->direction = DMA_FROM_DEVICE;
clear_bit(RDMACTXT_F_READ_DONE, &ctxt->flags);
clear_bit(RDMACTXT_F_LAST_CTXT, &ctxt->flags);
if ((ch+1)->rc_discrim == 0) {
/*
* Checked in sq_cq_reap to see if we need to
* be enqueued
*/
set_bit(RDMACTXT_F_LAST_CTXT, &ctxt->flags);
ctxt->next = hdr_ctxt;
hdr_ctxt->next = head;
}
/* Prepare READ WR */
memset(&read_wr, 0, sizeof read_wr);
ctxt->wr_op = IB_WR_RDMA_READ;
read_wr.wr_id = (unsigned long)ctxt;
read_wr.opcode = IB_WR_RDMA_READ;
read_wr.send_flags = IB_SEND_SIGNALED;
read_wr.wr.rdma.rkey = ch->rc_target.rs_handle;
read_wr.wr.rdma.remote_addr =
get_unaligned(&(ch->rc_target.rs_offset)) +
sgl_offset;
read_wr.sg_list = &sge[ch_sge_ary[ch_no].start];
read_wr.num_sge =
rdma_read_max_sge(xprt, ch_sge_ary[ch_no].count);
rdma_set_ctxt_sge(ctxt, &sge[ch_sge_ary[ch_no].start],
&sgl_offset,
read_wr.num_sge);
/* Post the read */
err = svc_rdma_send(xprt, &read_wr);
if (err) {
printk(KERN_ERR "svcrdma: Error posting send = %d\n",
err);
/*
* Break the circular list so free knows when
* to stop if the error happened to occur on
* the last read
*/
ctxt->next = NULL;
goto out;
}
atomic_inc(&rdma_stat_read);
if (read_wr.num_sge < ch_sge_ary[ch_no].count) {
ch_sge_ary[ch_no].count -= read_wr.num_sge;
ch_sge_ary[ch_no].start += read_wr.num_sge;
goto next_sge;
}
sgl_offset = 0;
err = 0;
}
out:
svc_rdma_put_context(tmp_sge_ctxt, 0);
svc_rdma_put_context(tmp_ch_ctxt, 0);
/* Detach arg pages. svc_recv will replenish them */
for (ch_no = 0; &rqstp->rq_pages[ch_no] < rqstp->rq_respages; ch_no++)
rqstp->rq_pages[ch_no] = NULL;
/*
* Detach res pages. svc_release must see a resused count of
* zero or it will attempt to put them.
*/
while (rqstp->rq_resused)
rqstp->rq_respages[--rqstp->rq_resused] = NULL;
if (err) {
printk(KERN_ERR "svcrdma : RDMA_READ error = %d\n", err);
set_bit(XPT_CLOSE, &xprt->sc_xprt.xpt_flags);
/* Free the linked list of read contexts */
while (head != NULL) {
ctxt = head->next;
svc_rdma_put_context(head, 1);
head = ctxt;
}
return 0;
}
return 1;
}
static int rdma_read_complete(struct svc_rqst *rqstp,
struct svc_rdma_op_ctxt *data)
{
struct svc_rdma_op_ctxt *head = data->next;
int page_no;
int ret;
BUG_ON(!head);
/* Copy RPC pages */
for (page_no = 0; page_no < head->count; page_no++) {
put_page(rqstp->rq_pages[page_no]);
rqstp->rq_pages[page_no] = head->pages[page_no];
}
/* Point rq_arg.pages past header */
rqstp->rq_arg.pages = &rqstp->rq_pages[head->sge[0].length];
rqstp->rq_arg.page_len = head->arg.page_len;
rqstp->rq_arg.page_base = head->arg.page_base;
/* rq_respages starts after the last arg page */
rqstp->rq_respages = &rqstp->rq_arg.pages[page_no];
rqstp->rq_resused = 0;
/* Rebuild rq_arg head and tail. */
rqstp->rq_arg.head[0] = head->arg.head[0];
rqstp->rq_arg.tail[0] = head->arg.tail[0];
rqstp->rq_arg.len = head->arg.len;
rqstp->rq_arg.buflen = head->arg.buflen;
/* XXX: What should this be? */
rqstp->rq_prot = IPPROTO_MAX;
/*
* Free the contexts we used to build the RDMA_READ. We have
* to be careful here because the context list uses the same
* next pointer used to chain the contexts associated with the
* RDMA_READ
*/
data->next = NULL; /* terminate circular list */
do {
data = head->next;
svc_rdma_put_context(head, 0);
head = data;
} while (head != NULL);
ret = rqstp->rq_arg.head[0].iov_len
+ rqstp->rq_arg.page_len
+ rqstp->rq_arg.tail[0].iov_len;
dprintk("svcrdma: deferred read ret=%d, rq_arg.len =%d, "
"rq_arg.head[0].iov_base=%p, rq_arg.head[0].iov_len = %zd\n",
ret, rqstp->rq_arg.len, rqstp->rq_arg.head[0].iov_base,
rqstp->rq_arg.head[0].iov_len);
/* Indicate that we've consumed an RQ credit */
rqstp->rq_xprt_ctxt = rqstp->rq_xprt;
svc_xprt_received(rqstp->rq_xprt);
return ret;
}
/*
* Set up the rqstp thread context to point to the RQ buffer. If
* necessary, pull additional data from the client with an RDMA_READ
* request.
*/
int svc_rdma_recvfrom(struct svc_rqst *rqstp)
{
struct svc_xprt *xprt = rqstp->rq_xprt;
struct svcxprt_rdma *rdma_xprt =
container_of(xprt, struct svcxprt_rdma, sc_xprt);
struct svc_rdma_op_ctxt *ctxt = NULL;
struct rpcrdma_msg *rmsgp;
int ret = 0;
int len;
dprintk("svcrdma: rqstp=%p\n", rqstp);
/*
* The rq_xprt_ctxt indicates if we've consumed an RQ credit
* or not. It is used in the rdma xpo_release_rqst function to
* determine whether or not to return an RQ WQE to the RQ.
*/
rqstp->rq_xprt_ctxt = NULL;
spin_lock_bh(&rdma_xprt->sc_read_complete_lock);
if (!list_empty(&rdma_xprt->sc_read_complete_q)) {
ctxt = list_entry(rdma_xprt->sc_read_complete_q.next,
struct svc_rdma_op_ctxt,
dto_q);
list_del_init(&ctxt->dto_q);
}
spin_unlock_bh(&rdma_xprt->sc_read_complete_lock);
if (ctxt)
return rdma_read_complete(rqstp, ctxt);
spin_lock_bh(&rdma_xprt->sc_rq_dto_lock);
if (!list_empty(&rdma_xprt->sc_rq_dto_q)) {
ctxt = list_entry(rdma_xprt->sc_rq_dto_q.next,
struct svc_rdma_op_ctxt,
dto_q);
list_del_init(&ctxt->dto_q);
} else {
atomic_inc(&rdma_stat_rq_starve);
clear_bit(XPT_DATA, &xprt->xpt_flags);
ctxt = NULL;
}
spin_unlock_bh(&rdma_xprt->sc_rq_dto_lock);
if (!ctxt) {
/* This is the EAGAIN path. The svc_recv routine will
* return -EAGAIN, the nfsd thread will go to call into
* svc_recv again and we shouldn't be on the active
* transport list
*/
if (test_bit(XPT_CLOSE, &xprt->xpt_flags))
goto close_out;
BUG_ON(ret);
goto out;
}
dprintk("svcrdma: processing ctxt=%p on xprt=%p, rqstp=%p, status=%d\n",
ctxt, rdma_xprt, rqstp, ctxt->wc_status);
BUG_ON(ctxt->wc_status != IB_WC_SUCCESS);
atomic_inc(&rdma_stat_recv);
/* Build up the XDR from the receive buffers. */
rdma_build_arg_xdr(rqstp, ctxt, ctxt->byte_len);
/* Decode the RDMA header. */
len = svc_rdma_xdr_decode_req(&rmsgp, rqstp);
rqstp->rq_xprt_hlen = len;
/* If the request is invalid, reply with an error */
if (len < 0) {
if (len == -ENOSYS)
(void)svc_rdma_send_error(rdma_xprt, rmsgp, ERR_VERS);
goto close_out;
}
/* Read read-list data. If we would need to wait, defer
* it. Not that in this case, we don't return the RQ credit
* until after the read completes.
*/
if (rdma_read_xdr(rdma_xprt, rmsgp, rqstp, ctxt)) {
svc_xprt_received(xprt);
return 0;
}
/* Indicate we've consumed an RQ credit */
rqstp->rq_xprt_ctxt = rqstp->rq_xprt;
ret = rqstp->rq_arg.head[0].iov_len
+ rqstp->rq_arg.page_len
+ rqstp->rq_arg.tail[0].iov_len;
svc_rdma_put_context(ctxt, 0);
out:
dprintk("svcrdma: ret = %d, rq_arg.len =%d, "
"rq_arg.head[0].iov_base=%p, rq_arg.head[0].iov_len = %zd\n",
ret, rqstp->rq_arg.len,
rqstp->rq_arg.head[0].iov_base,
rqstp->rq_arg.head[0].iov_len);
rqstp->rq_prot = IPPROTO_MAX;
svc_xprt_copy_addrs(rqstp, xprt);
svc_xprt_received(xprt);
return ret;
close_out:
if (ctxt) {
svc_rdma_put_context(ctxt, 1);
/* Indicate we've consumed an RQ credit */
rqstp->rq_xprt_ctxt = rqstp->rq_xprt;
}
dprintk("svcrdma: transport %p is closing\n", xprt);
/*
* Set the close bit and enqueue it. svc_recv will see the
* close bit and call svc_xprt_delete
*/
set_bit(XPT_CLOSE, &xprt->xpt_flags);
svc_xprt_received(xprt);
return 0;
}

View File

@ -0,0 +1,520 @@
/*
* Copyright (c) 2005-2006 Network Appliance, Inc. All rights reserved.
*
* This software is available to you under a choice of one of two
* licenses. You may choose to be licensed under the terms of the GNU
* General Public License (GPL) Version 2, available from the file
* COPYING in the main directory of this source tree, or the BSD-type
* license below:
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
*
* Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
*
* Redistributions in binary form must reproduce the above
* copyright notice, this list of conditions and the following
* disclaimer in the documentation and/or other materials provided
* with the distribution.
*
* Neither the name of the Network Appliance, Inc. nor the names of
* its contributors may be used to endorse or promote products
* derived from this software without specific prior written
* permission.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
* "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
* LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
* A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
* OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
* SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
* LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
* DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
* THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
* OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*
* Author: Tom Tucker <tom@opengridcomputing.com>
*/
#include <linux/sunrpc/debug.h>
#include <linux/sunrpc/rpc_rdma.h>
#include <linux/spinlock.h>
#include <asm/unaligned.h>
#include <rdma/ib_verbs.h>
#include <rdma/rdma_cm.h>
#include <linux/sunrpc/svc_rdma.h>
#define RPCDBG_FACILITY RPCDBG_SVCXPRT
/* Encode an XDR as an array of IB SGE
*
* Assumptions:
* - head[0] is physically contiguous.
* - tail[0] is physically contiguous.
* - pages[] is not physically or virtually contigous and consists of
* PAGE_SIZE elements.
*
* Output:
* SGE[0] reserved for RCPRDMA header
* SGE[1] data from xdr->head[]
* SGE[2..sge_count-2] data from xdr->pages[]
* SGE[sge_count-1] data from xdr->tail.
*
*/
static struct ib_sge *xdr_to_sge(struct svcxprt_rdma *xprt,
struct xdr_buf *xdr,
struct ib_sge *sge,
int *sge_count)
{
/* Max we need is the length of the XDR / pagesize + one for
* head + one for tail + one for RPCRDMA header
*/
int sge_max = (xdr->len+PAGE_SIZE-1) / PAGE_SIZE + 3;
int sge_no;
u32 byte_count = xdr->len;
u32 sge_bytes;
u32 page_bytes;
int page_off;
int page_no;
/* Skip the first sge, this is for the RPCRDMA header */
sge_no = 1;
/* Head SGE */
sge[sge_no].addr = ib_dma_map_single(xprt->sc_cm_id->device,
xdr->head[0].iov_base,
xdr->head[0].iov_len,
DMA_TO_DEVICE);
sge_bytes = min_t(u32, byte_count, xdr->head[0].iov_len);
byte_count -= sge_bytes;
sge[sge_no].length = sge_bytes;
sge[sge_no].lkey = xprt->sc_phys_mr->lkey;
sge_no++;
/* pages SGE */
page_no = 0;
page_bytes = xdr->page_len;
page_off = xdr->page_base;
while (byte_count && page_bytes) {
sge_bytes = min_t(u32, byte_count, (PAGE_SIZE-page_off));
sge[sge_no].addr =
ib_dma_map_page(xprt->sc_cm_id->device,
xdr->pages[page_no], page_off,
sge_bytes, DMA_TO_DEVICE);
sge_bytes = min(sge_bytes, page_bytes);
byte_count -= sge_bytes;
page_bytes -= sge_bytes;
sge[sge_no].length = sge_bytes;
sge[sge_no].lkey = xprt->sc_phys_mr->lkey;
sge_no++;
page_no++;
page_off = 0; /* reset for next time through loop */
}
/* Tail SGE */
if (byte_count && xdr->tail[0].iov_len) {
sge[sge_no].addr =
ib_dma_map_single(xprt->sc_cm_id->device,
xdr->tail[0].iov_base,
xdr->tail[0].iov_len,
DMA_TO_DEVICE);
sge_bytes = min_t(u32, byte_count, xdr->tail[0].iov_len);
byte_count -= sge_bytes;
sge[sge_no].length = sge_bytes;
sge[sge_no].lkey = xprt->sc_phys_mr->lkey;
sge_no++;
}
BUG_ON(sge_no > sge_max);
BUG_ON(byte_count != 0);
*sge_count = sge_no;
return sge;
}
/* Assumptions:
* - The specified write_len can be represented in sc_max_sge * PAGE_SIZE
*/
static int send_write(struct svcxprt_rdma *xprt, struct svc_rqst *rqstp,
u32 rmr, u64 to,
u32 xdr_off, int write_len,
struct ib_sge *xdr_sge, int sge_count)
{
struct svc_rdma_op_ctxt *tmp_sge_ctxt;
struct ib_send_wr write_wr;
struct ib_sge *sge;
int xdr_sge_no;
int sge_no;
int sge_bytes;
int sge_off;
int bc;
struct svc_rdma_op_ctxt *ctxt;
int ret = 0;
BUG_ON(sge_count >= 32);
dprintk("svcrdma: RDMA_WRITE rmr=%x, to=%llx, xdr_off=%d, "
"write_len=%d, xdr_sge=%p, sge_count=%d\n",
rmr, to, xdr_off, write_len, xdr_sge, sge_count);
ctxt = svc_rdma_get_context(xprt);
ctxt->count = 0;
tmp_sge_ctxt = svc_rdma_get_context(xprt);
sge = tmp_sge_ctxt->sge;
/* Find the SGE associated with xdr_off */
for (bc = xdr_off, xdr_sge_no = 1; bc && xdr_sge_no < sge_count;
xdr_sge_no++) {
if (xdr_sge[xdr_sge_no].length > bc)
break;
bc -= xdr_sge[xdr_sge_no].length;
}
sge_off = bc;
bc = write_len;
sge_no = 0;
/* Copy the remaining SGE */
while (bc != 0 && xdr_sge_no < sge_count) {
sge[sge_no].addr = xdr_sge[xdr_sge_no].addr + sge_off;
sge[sge_no].lkey = xdr_sge[xdr_sge_no].lkey;
sge_bytes = min((size_t)bc,
(size_t)(xdr_sge[xdr_sge_no].length-sge_off));
sge[sge_no].length = sge_bytes;
sge_off = 0;
sge_no++;
xdr_sge_no++;
bc -= sge_bytes;
}
BUG_ON(bc != 0);
BUG_ON(xdr_sge_no > sge_count);
/* Prepare WRITE WR */
memset(&write_wr, 0, sizeof write_wr);
ctxt->wr_op = IB_WR_RDMA_WRITE;
write_wr.wr_id = (unsigned long)ctxt;
write_wr.sg_list = &sge[0];
write_wr.num_sge = sge_no;
write_wr.opcode = IB_WR_RDMA_WRITE;
write_wr.send_flags = IB_SEND_SIGNALED;
write_wr.wr.rdma.rkey = rmr;
write_wr.wr.rdma.remote_addr = to;
/* Post It */
atomic_inc(&rdma_stat_write);
if (svc_rdma_send(xprt, &write_wr)) {
svc_rdma_put_context(ctxt, 1);
/* Fatal error, close transport */
ret = -EIO;
}
svc_rdma_put_context(tmp_sge_ctxt, 0);
return ret;
}
static int send_write_chunks(struct svcxprt_rdma *xprt,
struct rpcrdma_msg *rdma_argp,
struct rpcrdma_msg *rdma_resp,
struct svc_rqst *rqstp,
struct ib_sge *sge,
int sge_count)
{
u32 xfer_len = rqstp->rq_res.page_len + rqstp->rq_res.tail[0].iov_len;
int write_len;
int max_write;
u32 xdr_off;
int chunk_off;
int chunk_no;
struct rpcrdma_write_array *arg_ary;
struct rpcrdma_write_array *res_ary;
int ret;
arg_ary = svc_rdma_get_write_array(rdma_argp);
if (!arg_ary)
return 0;
res_ary = (struct rpcrdma_write_array *)
&rdma_resp->rm_body.rm_chunks[1];
max_write = xprt->sc_max_sge * PAGE_SIZE;
/* Write chunks start at the pagelist */
for (xdr_off = rqstp->rq_res.head[0].iov_len, chunk_no = 0;
xfer_len && chunk_no < arg_ary->wc_nchunks;
chunk_no++) {
struct rpcrdma_segment *arg_ch;
u64 rs_offset;
arg_ch = &arg_ary->wc_array[chunk_no].wc_target;
write_len = min(xfer_len, arg_ch->rs_length);
/* Prepare the response chunk given the length actually
* written */
rs_offset = get_unaligned(&(arg_ch->rs_offset));
svc_rdma_xdr_encode_array_chunk(res_ary, chunk_no,
arg_ch->rs_handle,
rs_offset,
write_len);
chunk_off = 0;
while (write_len) {
int this_write;
this_write = min(write_len, max_write);
ret = send_write(xprt, rqstp,
arg_ch->rs_handle,
rs_offset + chunk_off,
xdr_off,
this_write,
sge,
sge_count);
if (ret) {
dprintk("svcrdma: RDMA_WRITE failed, ret=%d\n",
ret);
return -EIO;
}
chunk_off += this_write;
xdr_off += this_write;
xfer_len -= this_write;
write_len -= this_write;
}
}
/* Update the req with the number of chunks actually used */
svc_rdma_xdr_encode_write_list(rdma_resp, chunk_no);
return rqstp->rq_res.page_len + rqstp->rq_res.tail[0].iov_len;
}
static int send_reply_chunks(struct svcxprt_rdma *xprt,
struct rpcrdma_msg *rdma_argp,
struct rpcrdma_msg *rdma_resp,
struct svc_rqst *rqstp,
struct ib_sge *sge,
int sge_count)
{
u32 xfer_len = rqstp->rq_res.len;
int write_len;
int max_write;
u32 xdr_off;
int chunk_no;
int chunk_off;
struct rpcrdma_segment *ch;
struct rpcrdma_write_array *arg_ary;
struct rpcrdma_write_array *res_ary;
int ret;
arg_ary = svc_rdma_get_reply_array(rdma_argp);
if (!arg_ary)
return 0;
/* XXX: need to fix when reply lists occur with read-list and or
* write-list */
res_ary = (struct rpcrdma_write_array *)
&rdma_resp->rm_body.rm_chunks[2];
max_write = xprt->sc_max_sge * PAGE_SIZE;
/* xdr offset starts at RPC message */
for (xdr_off = 0, chunk_no = 0;
xfer_len && chunk_no < arg_ary->wc_nchunks;
chunk_no++) {
u64 rs_offset;
ch = &arg_ary->wc_array[chunk_no].wc_target;
write_len = min(xfer_len, ch->rs_length);
/* Prepare the reply chunk given the length actually
* written */
rs_offset = get_unaligned(&(ch->rs_offset));
svc_rdma_xdr_encode_array_chunk(res_ary, chunk_no,
ch->rs_handle, rs_offset,
write_len);
chunk_off = 0;
while (write_len) {
int this_write;
this_write = min(write_len, max_write);
ret = send_write(xprt, rqstp,
ch->rs_handle,
rs_offset + chunk_off,
xdr_off,
this_write,
sge,
sge_count);
if (ret) {
dprintk("svcrdma: RDMA_WRITE failed, ret=%d\n",
ret);
return -EIO;
}
chunk_off += this_write;
xdr_off += this_write;
xfer_len -= this_write;
write_len -= this_write;
}
}
/* Update the req with the number of chunks actually used */
svc_rdma_xdr_encode_reply_array(res_ary, chunk_no);
return rqstp->rq_res.len;
}
/* This function prepares the portion of the RPCRDMA message to be
* sent in the RDMA_SEND. This function is called after data sent via
* RDMA has already been transmitted. There are three cases:
* - The RPCRDMA header, RPC header, and payload are all sent in a
* single RDMA_SEND. This is the "inline" case.
* - The RPCRDMA header and some portion of the RPC header and data
* are sent via this RDMA_SEND and another portion of the data is
* sent via RDMA.
* - The RPCRDMA header [NOMSG] is sent in this RDMA_SEND and the RPC
* header and data are all transmitted via RDMA.
* In all three cases, this function prepares the RPCRDMA header in
* sge[0], the 'type' parameter indicates the type to place in the
* RPCRDMA header, and the 'byte_count' field indicates how much of
* the XDR to include in this RDMA_SEND.
*/
static int send_reply(struct svcxprt_rdma *rdma,
struct svc_rqst *rqstp,
struct page *page,
struct rpcrdma_msg *rdma_resp,
struct svc_rdma_op_ctxt *ctxt,
int sge_count,
int byte_count)
{
struct ib_send_wr send_wr;
int sge_no;
int sge_bytes;
int page_no;
int ret;
/* Prepare the context */
ctxt->pages[0] = page;
ctxt->count = 1;
/* Prepare the SGE for the RPCRDMA Header */
ctxt->sge[0].addr =
ib_dma_map_page(rdma->sc_cm_id->device,
page, 0, PAGE_SIZE, DMA_TO_DEVICE);
ctxt->direction = DMA_TO_DEVICE;
ctxt->sge[0].length = svc_rdma_xdr_get_reply_hdr_len(rdma_resp);
ctxt->sge[0].lkey = rdma->sc_phys_mr->lkey;
/* Determine how many of our SGE are to be transmitted */
for (sge_no = 1; byte_count && sge_no < sge_count; sge_no++) {
sge_bytes = min((size_t)ctxt->sge[sge_no].length,
(size_t)byte_count);
byte_count -= sge_bytes;
}
BUG_ON(byte_count != 0);
/* Save all respages in the ctxt and remove them from the
* respages array. They are our pages until the I/O
* completes.
*/
for (page_no = 0; page_no < rqstp->rq_resused; page_no++) {
ctxt->pages[page_no+1] = rqstp->rq_respages[page_no];
ctxt->count++;
rqstp->rq_respages[page_no] = NULL;
}
BUG_ON(sge_no > rdma->sc_max_sge);
memset(&send_wr, 0, sizeof send_wr);
ctxt->wr_op = IB_WR_SEND;
send_wr.wr_id = (unsigned long)ctxt;
send_wr.sg_list = ctxt->sge;
send_wr.num_sge = sge_no;
send_wr.opcode = IB_WR_SEND;
send_wr.send_flags = IB_SEND_SIGNALED;
ret = svc_rdma_send(rdma, &send_wr);
if (ret)
svc_rdma_put_context(ctxt, 1);
return ret;
}
void svc_rdma_prep_reply_hdr(struct svc_rqst *rqstp)
{
}
/*
* Return the start of an xdr buffer.
*/
static void *xdr_start(struct xdr_buf *xdr)
{
return xdr->head[0].iov_base -
(xdr->len -
xdr->page_len -
xdr->tail[0].iov_len -
xdr->head[0].iov_len);
}
int svc_rdma_sendto(struct svc_rqst *rqstp)
{
struct svc_xprt *xprt = rqstp->rq_xprt;
struct svcxprt_rdma *rdma =
container_of(xprt, struct svcxprt_rdma, sc_xprt);
struct rpcrdma_msg *rdma_argp;
struct rpcrdma_msg *rdma_resp;
struct rpcrdma_write_array *reply_ary;
enum rpcrdma_proc reply_type;
int ret;
int inline_bytes;
struct ib_sge *sge;
int sge_count = 0;
struct page *res_page;
struct svc_rdma_op_ctxt *ctxt;
dprintk("svcrdma: sending response for rqstp=%p\n", rqstp);
/* Get the RDMA request header. */
rdma_argp = xdr_start(&rqstp->rq_arg);
/* Build an SGE for the XDR */
ctxt = svc_rdma_get_context(rdma);
ctxt->direction = DMA_TO_DEVICE;
sge = xdr_to_sge(rdma, &rqstp->rq_res, ctxt->sge, &sge_count);
inline_bytes = rqstp->rq_res.len;
/* Create the RDMA response header */
res_page = svc_rdma_get_page();
rdma_resp = page_address(res_page);
reply_ary = svc_rdma_get_reply_array(rdma_argp);
if (reply_ary)
reply_type = RDMA_NOMSG;
else
reply_type = RDMA_MSG;
svc_rdma_xdr_encode_reply_header(rdma, rdma_argp,
rdma_resp, reply_type);
/* Send any write-chunk data and build resp write-list */
ret = send_write_chunks(rdma, rdma_argp, rdma_resp,
rqstp, sge, sge_count);
if (ret < 0) {
printk(KERN_ERR "svcrdma: failed to send write chunks, rc=%d\n",
ret);
goto error;
}
inline_bytes -= ret;
/* Send any reply-list data and update resp reply-list */
ret = send_reply_chunks(rdma, rdma_argp, rdma_resp,
rqstp, sge, sge_count);
if (ret < 0) {
printk(KERN_ERR "svcrdma: failed to send reply chunks, rc=%d\n",
ret);
goto error;
}
inline_bytes -= ret;
ret = send_reply(rdma, rqstp, res_page, rdma_resp, ctxt, sge_count,
inline_bytes);
dprintk("svcrdma: send_reply returns %d\n", ret);
return ret;
error:
svc_rdma_put_context(ctxt, 0);
put_page(res_page);
return ret;
}

File diff suppressed because it is too large Load Diff