Highlights:

- Fix EXCHANGE_ID response when NFSD runs in a container
 - A battery of new static trace points
 - Socket transports now use bio_vec to send Replies
 - NFS/RDMA now supports filesystems with no .splice_read method
 - Favor memcpy() over DMA mapping for small RPC/RDMA Replies
 - Add pre-requisites for supporting multiple Write chunks
 - Numerous minor fixes and clean-ups
 -----BEGIN PGP SIGNATURE-----
 Version: GnuPG v2.0.22 (GNU/Linux)
 
 iQIcBAABAgAGBQJegj9pAAoJEDNqszNvZn+XNGgP/RsRul/UGe70YoPS6AwxI+c1
 2JVni5LV83aVGSN1df/xRNdugWh4j8e8stBIJPCnWFzUERFvrzVeVyW0/dlIy37l
 SRL1L62EzFUejAL45O+CkF5+KI2kAWMgDCv+rPnFnIuXVa/sThx63F1AJikVMPjB
 7We3vd5Kh/CrMeMflebJYuY12xE6di2b3ifkZRO0/yuMaAuqJrDreYf4L6xpA4rC
 QnKQcNl7LGlOwGSI2WvDrCLE056PJFhTuzTawI80NKnkXMMFNc6/7NoXJqasVlHG
 fiki2mHbJrbYd8isIm3Vl/QkFsM8QjijtpVxC9gd151w0P7DfpMYmSzlZL7nvq/R
 Nt6IIqbaxWSS1VULsuS7rDtBwwZpW/LRWaUhEvMwimR2jeOxcwtlDVTX/dRH2mxq
 Ume64Hn8xMEhhx9tHCPQ+Rgjqv5m+ZEAvmV6B7RM9nT2z2MSzQQESeMB14VZZmF/
 2oH1dDCVdCmb4ZOcD5yxL6Y1hijn45s+YHdts9uIsCudKYPI906vPhogFC+PVJv+
 MrOiUf8d40H0ra8VAUFCjAceOulkv90aLhBjoHbPsP4SQOTsRuUXnsKESZpSHY72
 nT/uPM23ULv4kQ6tHB8yQ3ordjCBRgb4zIKtotc3Wpi7dhO8u6ptPj4soiflRShO
 8/3N5dYfqdt9FRyr7Z8/
 =o5G0
 -----END PGP SIGNATURE-----

Merge tag 'nfsd-5.7' of git://git.linux-nfs.org/projects/cel/cel-2.6

Pull nfsd updates from Chuck Lever:

 - Fix EXCHANGE_ID response when NFSD runs in a container

 - A battery of new static trace points

 - Socket transports now use bio_vec to send Replies

 - NFS/RDMA now supports filesystems with no .splice_read method

 - Favor memcpy() over DMA mapping for small RPC/RDMA Replies

 - Add pre-requisites for supporting multiple Write chunks

 - Numerous minor fixes and clean-ups

[ Chuck is filling in for Bruce this time while he and his family settle
  into a new house ]

* tag 'nfsd-5.7' of git://git.linux-nfs.org/projects/cel/cel-2.6: (39 commits)
  svcrdma: Fix leak of transport addresses
  SUNRPC: Fix a potential buffer overflow in 'svc_print_xprts()'
  SUNRPC/cache: don't allow invalid entries to be flushed
  nfsd: fsnotify on rmdir under nfsd/clients/
  nfsd4: kill warnings on testing stateids with mismatched clientids
  nfsd: remove read permission bit for ctl sysctl
  NFSD: Fix NFS server build errors
  sunrpc: Add tracing for cache events
  SUNRPC/cache: Allow garbage collection of invalid cache entries
  nfsd: export upcalls must not return ESTALE when mountd is down
  nfsd: Add tracepoints for update of the expkey and export cache entries
  nfsd: Add tracepoints for exp_find_key() and exp_get_by_name()
  nfsd: Add tracing to nfsd_set_fh_dentry()
  nfsd: Don't add locks to closed or closing open stateids
  SUNRPC: Teach server to use xprt_sock_sendmsg for socket sends
  SUNRPC: Refactor xs_sendpages()
  svcrdma: Avoid DMA mapping small RPC Replies
  svcrdma: Fix double sync of transport header buffer
  svcrdma: Refactor chunk list encoders
  SUNRPC: Add encoders for list item discriminators
  ...
This commit is contained in:
Linus Torvalds 2020-04-04 11:13:51 -07:00
commit b3d8e42282
40 changed files with 1420 additions and 922 deletions

View File

@ -152,12 +152,13 @@ static int nfs_dns_upcall(struct cache_detail *cd,
struct cache_head *ch) struct cache_head *ch)
{ {
struct nfs_dns_ent *key = container_of(ch, struct nfs_dns_ent, h); struct nfs_dns_ent *key = container_of(ch, struct nfs_dns_ent, h);
int ret;
ret = nfs_cache_upcall(cd, key->hostname); if (test_and_set_bit(CACHE_PENDING, &ch->flags))
if (ret) return 0;
ret = sunrpc_cache_pipe_upcall(cd, ch); if (!nfs_cache_upcall(cd, key->hostname))
return ret; return 0;
clear_bit(CACHE_PENDING, &ch->flags);
return sunrpc_cache_pipe_upcall_timeout(cd, ch);
} }
static int nfs_dns_match(struct cache_head *ca, static int nfs_dns_match(struct cache_head *ca,

View File

@ -136,7 +136,7 @@ config NFSD_FLEXFILELAYOUT
config NFSD_V4_2_INTER_SSC config NFSD_V4_2_INTER_SSC
bool "NFSv4.2 inter server to server COPY" bool "NFSv4.2 inter server to server COPY"
depends on NFSD_V4 && NFS_V4_1 && NFS_V4_2 depends on NFSD_V4 && NFS_V4_1 && NFS_V4_2 && NFS_FS=y
help help
This option enables support for NFSv4.2 inter server to This option enables support for NFSv4.2 inter server to
server copy where the destination server calls the NFSv4.2 server copy where the destination server calls the NFSv4.2

View File

@ -23,6 +23,7 @@
#include "netns.h" #include "netns.h"
#include "pnfs.h" #include "pnfs.h"
#include "filecache.h" #include "filecache.h"
#include "trace.h"
#define NFSDDBG_FACILITY NFSDDBG_EXPORT #define NFSDDBG_FACILITY NFSDDBG_EXPORT
@ -50,6 +51,11 @@ static void expkey_put(struct kref *ref)
kfree_rcu(key, ek_rcu); kfree_rcu(key, ek_rcu);
} }
static int expkey_upcall(struct cache_detail *cd, struct cache_head *h)
{
return sunrpc_cache_pipe_upcall(cd, h);
}
static void expkey_request(struct cache_detail *cd, static void expkey_request(struct cache_detail *cd,
struct cache_head *h, struct cache_head *h,
char **bpp, int *blen) char **bpp, int *blen)
@ -140,7 +146,9 @@ static int expkey_parse(struct cache_detail *cd, char *mesg, int mlen)
if (len == 0) { if (len == 0) {
set_bit(CACHE_NEGATIVE, &key.h.flags); set_bit(CACHE_NEGATIVE, &key.h.flags);
ek = svc_expkey_update(cd, &key, ek); ek = svc_expkey_update(cd, &key, ek);
if (!ek) if (ek)
trace_nfsd_expkey_update(ek, NULL);
else
err = -ENOMEM; err = -ENOMEM;
} else { } else {
err = kern_path(buf, 0, &key.ek_path); err = kern_path(buf, 0, &key.ek_path);
@ -150,7 +158,9 @@ static int expkey_parse(struct cache_detail *cd, char *mesg, int mlen)
dprintk("Found the path %s\n", buf); dprintk("Found the path %s\n", buf);
ek = svc_expkey_update(cd, &key, ek); ek = svc_expkey_update(cd, &key, ek);
if (!ek) if (ek)
trace_nfsd_expkey_update(ek, buf);
else
err = -ENOMEM; err = -ENOMEM;
path_put(&key.ek_path); path_put(&key.ek_path);
} }
@ -249,6 +259,7 @@ static const struct cache_detail svc_expkey_cache_template = {
.hash_size = EXPKEY_HASHMAX, .hash_size = EXPKEY_HASHMAX,
.name = "nfsd.fh", .name = "nfsd.fh",
.cache_put = expkey_put, .cache_put = expkey_put,
.cache_upcall = expkey_upcall,
.cache_request = expkey_request, .cache_request = expkey_request,
.cache_parse = expkey_parse, .cache_parse = expkey_parse,
.cache_show = expkey_show, .cache_show = expkey_show,
@ -330,6 +341,11 @@ static void svc_export_put(struct kref *ref)
kfree_rcu(exp, ex_rcu); kfree_rcu(exp, ex_rcu);
} }
static int svc_export_upcall(struct cache_detail *cd, struct cache_head *h)
{
return sunrpc_cache_pipe_upcall(cd, h);
}
static void svc_export_request(struct cache_detail *cd, static void svc_export_request(struct cache_detail *cd,
struct cache_head *h, struct cache_head *h,
char **bpp, int *blen) char **bpp, int *blen)
@ -643,15 +659,17 @@ static int svc_export_parse(struct cache_detail *cd, char *mesg, int mlen)
} }
expp = svc_export_lookup(&exp); expp = svc_export_lookup(&exp);
if (expp) if (!expp) {
expp = svc_export_update(&exp, expp);
else
err = -ENOMEM; err = -ENOMEM;
cache_flush(); goto out4;
if (expp == NULL) }
err = -ENOMEM; expp = svc_export_update(&exp, expp);
else if (expp) {
trace_nfsd_export_update(expp);
cache_flush();
exp_put(expp); exp_put(expp);
} else
err = -ENOMEM;
out4: out4:
nfsd4_fslocs_free(&exp.ex_fslocs); nfsd4_fslocs_free(&exp.ex_fslocs);
kfree(exp.ex_uuid); kfree(exp.ex_uuid);
@ -767,6 +785,7 @@ static const struct cache_detail svc_export_cache_template = {
.hash_size = EXPORT_HASHMAX, .hash_size = EXPORT_HASHMAX,
.name = "nfsd.export", .name = "nfsd.export",
.cache_put = svc_export_put, .cache_put = svc_export_put,
.cache_upcall = svc_export_upcall,
.cache_request = svc_export_request, .cache_request = svc_export_request,
.cache_parse = svc_export_parse, .cache_parse = svc_export_parse,
.cache_show = svc_export_show, .cache_show = svc_export_show,
@ -832,8 +851,10 @@ exp_find_key(struct cache_detail *cd, struct auth_domain *clp, int fsid_type,
if (ek == NULL) if (ek == NULL)
return ERR_PTR(-ENOMEM); return ERR_PTR(-ENOMEM);
err = cache_check(cd, &ek->h, reqp); err = cache_check(cd, &ek->h, reqp);
if (err) if (err) {
trace_nfsd_exp_find_key(&key, err);
return ERR_PTR(err); return ERR_PTR(err);
}
return ek; return ek;
} }
@ -855,8 +876,10 @@ exp_get_by_name(struct cache_detail *cd, struct auth_domain *clp,
if (exp == NULL) if (exp == NULL)
return ERR_PTR(-ENOMEM); return ERR_PTR(-ENOMEM);
err = cache_check(cd, &exp->h, reqp); err = cache_check(cd, &exp->h, reqp);
if (err) if (err) {
trace_nfsd_exp_get_by_name(&key, err);
return ERR_PTR(err); return ERR_PTR(err);
}
return exp; return exp;
} }

View File

@ -890,7 +890,7 @@ nfsd_file_find_locked(struct inode *inode, unsigned int may_flags,
unsigned char need = may_flags & NFSD_FILE_MAY_MASK; unsigned char need = may_flags & NFSD_FILE_MAY_MASK;
hlist_for_each_entry_rcu(nf, &nfsd_file_hashtbl[hashval].nfb_head, hlist_for_each_entry_rcu(nf, &nfsd_file_hashtbl[hashval].nfb_head,
nf_node) { nf_node, lockdep_is_held(&nfsd_file_hashtbl[hashval].nfb_lock)) {
if ((need & nf->nf_may) != need) if ((need & nf->nf_may) != need)
continue; continue;
if (nf->nf_inode != inode) if (nf->nf_inode != inode)

View File

@ -172,6 +172,8 @@ struct nfsd_net {
unsigned int longest_chain_cachesize; unsigned int longest_chain_cachesize;
struct shrinker nfsd_reply_cache_shrinker; struct shrinker nfsd_reply_cache_shrinker;
/* utsname taken from the the process that starts the server */
char nfsd_name[UNX_MAXNODENAME+1];
}; };
/* Simple check to find out if a given net was properly initialized */ /* Simple check to find out if a given net was properly initialized */

View File

@ -122,6 +122,12 @@ idtoname_hash(struct ent *ent)
return hash; return hash;
} }
static int
idtoname_upcall(struct cache_detail *cd, struct cache_head *h)
{
return sunrpc_cache_pipe_upcall_timeout(cd, h);
}
static void static void
idtoname_request(struct cache_detail *cd, struct cache_head *ch, char **bpp, idtoname_request(struct cache_detail *cd, struct cache_head *ch, char **bpp,
int *blen) int *blen)
@ -184,6 +190,7 @@ static const struct cache_detail idtoname_cache_template = {
.hash_size = ENT_HASHMAX, .hash_size = ENT_HASHMAX,
.name = "nfs4.idtoname", .name = "nfs4.idtoname",
.cache_put = ent_put, .cache_put = ent_put,
.cache_upcall = idtoname_upcall,
.cache_request = idtoname_request, .cache_request = idtoname_request,
.cache_parse = idtoname_parse, .cache_parse = idtoname_parse,
.cache_show = idtoname_show, .cache_show = idtoname_show,
@ -295,6 +302,12 @@ nametoid_hash(struct ent *ent)
return hash_str(ent->name, ENT_HASHBITS); return hash_str(ent->name, ENT_HASHBITS);
} }
static int
nametoid_upcall(struct cache_detail *cd, struct cache_head *h)
{
return sunrpc_cache_pipe_upcall_timeout(cd, h);
}
static void static void
nametoid_request(struct cache_detail *cd, struct cache_head *ch, char **bpp, nametoid_request(struct cache_detail *cd, struct cache_head *ch, char **bpp,
int *blen) int *blen)
@ -347,6 +360,7 @@ static const struct cache_detail nametoid_cache_template = {
.hash_size = ENT_HASHMAX, .hash_size = ENT_HASHMAX,
.name = "nfs4.nametoid", .name = "nfs4.nametoid",
.cache_put = ent_put, .cache_put = ent_put,
.cache_upcall = nametoid_upcall,
.cache_request = nametoid_request, .cache_request = nametoid_request,
.cache_parse = nametoid_parse, .cache_parse = nametoid_parse,
.cache_show = nametoid_show, .cache_show = nametoid_show,

View File

@ -494,6 +494,8 @@ find_any_file(struct nfs4_file *f)
{ {
struct nfsd_file *ret; struct nfsd_file *ret;
if (!f)
return NULL;
spin_lock(&f->fi_lock); spin_lock(&f->fi_lock);
ret = __nfs4_get_fd(f, O_RDWR); ret = __nfs4_get_fd(f, O_RDWR);
if (!ret) { if (!ret) {
@ -1309,6 +1311,12 @@ static void nfs4_put_stateowner(struct nfs4_stateowner *sop)
nfs4_free_stateowner(sop); nfs4_free_stateowner(sop);
} }
static bool
nfs4_ol_stateid_unhashed(const struct nfs4_ol_stateid *stp)
{
return list_empty(&stp->st_perfile);
}
static bool unhash_ol_stateid(struct nfs4_ol_stateid *stp) static bool unhash_ol_stateid(struct nfs4_ol_stateid *stp)
{ {
struct nfs4_file *fp = stp->st_stid.sc_file; struct nfs4_file *fp = stp->st_stid.sc_file;
@ -1379,9 +1387,11 @@ static bool unhash_lock_stateid(struct nfs4_ol_stateid *stp)
{ {
lockdep_assert_held(&stp->st_stid.sc_client->cl_lock); lockdep_assert_held(&stp->st_stid.sc_client->cl_lock);
if (!unhash_ol_stateid(stp))
return false;
list_del_init(&stp->st_locks); list_del_init(&stp->st_locks);
nfs4_unhash_stid(&stp->st_stid); nfs4_unhash_stid(&stp->st_stid);
return unhash_ol_stateid(stp); return true;
} }
static void release_lock_stateid(struct nfs4_ol_stateid *stp) static void release_lock_stateid(struct nfs4_ol_stateid *stp)
@ -1446,13 +1456,12 @@ static void release_open_stateid_locks(struct nfs4_ol_stateid *open_stp,
static bool unhash_open_stateid(struct nfs4_ol_stateid *stp, static bool unhash_open_stateid(struct nfs4_ol_stateid *stp,
struct list_head *reaplist) struct list_head *reaplist)
{ {
bool unhashed;
lockdep_assert_held(&stp->st_stid.sc_client->cl_lock); lockdep_assert_held(&stp->st_stid.sc_client->cl_lock);
unhashed = unhash_ol_stateid(stp); if (!unhash_ol_stateid(stp))
return false;
release_open_stateid_locks(stp, reaplist); release_open_stateid_locks(stp, reaplist);
return unhashed; return true;
} }
static void release_open_stateid(struct nfs4_ol_stateid *stp) static void release_open_stateid(struct nfs4_ol_stateid *stp)
@ -2636,7 +2645,7 @@ static const struct file_operations client_ctl_fops = {
static const struct tree_descr client_files[] = { static const struct tree_descr client_files[] = {
[0] = {"info", &client_info_fops, S_IRUSR}, [0] = {"info", &client_info_fops, S_IRUSR},
[1] = {"states", &client_states_fops, S_IRUSR}, [1] = {"states", &client_states_fops, S_IRUSR},
[2] = {"ctl", &client_ctl_fops, S_IRUSR|S_IWUSR}, [2] = {"ctl", &client_ctl_fops, S_IWUSR},
[3] = {""}, [3] = {""},
}; };
@ -4343,7 +4352,8 @@ find_file_locked(struct knfsd_fh *fh, unsigned int hashval)
{ {
struct nfs4_file *fp; struct nfs4_file *fp;
hlist_for_each_entry_rcu(fp, &file_hashtbl[hashval], fi_hash) { hlist_for_each_entry_rcu(fp, &file_hashtbl[hashval], fi_hash,
lockdep_is_held(&state_lock)) {
if (fh_match(&fp->fi_fhandle, fh)) { if (fh_match(&fp->fi_fhandle, fh)) {
if (refcount_inc_not_zero(&fp->fi_ref)) if (refcount_inc_not_zero(&fp->fi_ref))
return fp; return fp;
@ -5521,15 +5531,8 @@ static __be32 nfsd4_validate_stateid(struct nfs4_client *cl, stateid_t *stateid)
if (ZERO_STATEID(stateid) || ONE_STATEID(stateid) || if (ZERO_STATEID(stateid) || ONE_STATEID(stateid) ||
CLOSE_STATEID(stateid)) CLOSE_STATEID(stateid))
return status; return status;
/* Client debugging aid. */ if (!same_clid(&stateid->si_opaque.so_clid, &cl->cl_clientid))
if (!same_clid(&stateid->si_opaque.so_clid, &cl->cl_clientid)) {
char addr_str[INET6_ADDRSTRLEN];
rpc_ntop((struct sockaddr *)&cl->cl_addr, addr_str,
sizeof(addr_str));
pr_warn_ratelimited("NFSD: client %s testing state ID "
"with incorrect client ID\n", addr_str);
return status; return status;
}
spin_lock(&cl->cl_lock); spin_lock(&cl->cl_lock);
s = find_stateid_locked(cl, stateid); s = find_stateid_locked(cl, stateid);
if (!s) if (!s)
@ -6393,21 +6396,21 @@ alloc_init_lock_stateowner(unsigned int strhashval, struct nfs4_client *clp,
} }
static struct nfs4_ol_stateid * static struct nfs4_ol_stateid *
find_lock_stateid(struct nfs4_lockowner *lo, struct nfs4_file *fp) find_lock_stateid(const struct nfs4_lockowner *lo,
const struct nfs4_ol_stateid *ost)
{ {
struct nfs4_ol_stateid *lst; struct nfs4_ol_stateid *lst;
struct nfs4_client *clp = lo->lo_owner.so_client;
lockdep_assert_held(&clp->cl_lock); lockdep_assert_held(&ost->st_stid.sc_client->cl_lock);
list_for_each_entry(lst, &lo->lo_owner.so_stateids, st_perstateowner) { /* If ost is not hashed, ost->st_locks will not be valid */
if (lst->st_stid.sc_type != NFS4_LOCK_STID) if (!nfs4_ol_stateid_unhashed(ost))
continue; list_for_each_entry(lst, &ost->st_locks, st_locks) {
if (lst->st_stid.sc_file == fp) { if (lst->st_stateowner == &lo->lo_owner) {
refcount_inc(&lst->st_stid.sc_count); refcount_inc(&lst->st_stid.sc_count);
return lst; return lst;
}
} }
}
return NULL; return NULL;
} }
@ -6423,11 +6426,11 @@ init_lock_stateid(struct nfs4_ol_stateid *stp, struct nfs4_lockowner *lo,
mutex_lock_nested(&stp->st_mutex, OPEN_STATEID_MUTEX); mutex_lock_nested(&stp->st_mutex, OPEN_STATEID_MUTEX);
retry: retry:
spin_lock(&clp->cl_lock); spin_lock(&clp->cl_lock);
spin_lock(&fp->fi_lock); if (nfs4_ol_stateid_unhashed(open_stp))
retstp = find_lock_stateid(lo, fp); goto out_close;
retstp = find_lock_stateid(lo, open_stp);
if (retstp) if (retstp)
goto out_unlock; goto out_found;
refcount_inc(&stp->st_stid.sc_count); refcount_inc(&stp->st_stid.sc_count);
stp->st_stid.sc_type = NFS4_LOCK_STID; stp->st_stid.sc_type = NFS4_LOCK_STID;
stp->st_stateowner = nfs4_get_stateowner(&lo->lo_owner); stp->st_stateowner = nfs4_get_stateowner(&lo->lo_owner);
@ -6436,22 +6439,26 @@ retry:
stp->st_access_bmap = 0; stp->st_access_bmap = 0;
stp->st_deny_bmap = open_stp->st_deny_bmap; stp->st_deny_bmap = open_stp->st_deny_bmap;
stp->st_openstp = open_stp; stp->st_openstp = open_stp;
spin_lock(&fp->fi_lock);
list_add(&stp->st_locks, &open_stp->st_locks); list_add(&stp->st_locks, &open_stp->st_locks);
list_add(&stp->st_perstateowner, &lo->lo_owner.so_stateids); list_add(&stp->st_perstateowner, &lo->lo_owner.so_stateids);
list_add(&stp->st_perfile, &fp->fi_stateids); list_add(&stp->st_perfile, &fp->fi_stateids);
out_unlock:
spin_unlock(&fp->fi_lock); spin_unlock(&fp->fi_lock);
spin_unlock(&clp->cl_lock); spin_unlock(&clp->cl_lock);
if (retstp) {
if (nfsd4_lock_ol_stateid(retstp) != nfs_ok) {
nfs4_put_stid(&retstp->st_stid);
goto retry;
}
/* To keep mutex tracking happy */
mutex_unlock(&stp->st_mutex);
stp = retstp;
}
return stp; return stp;
out_found:
spin_unlock(&clp->cl_lock);
if (nfsd4_lock_ol_stateid(retstp) != nfs_ok) {
nfs4_put_stid(&retstp->st_stid);
goto retry;
}
/* To keep mutex tracking happy */
mutex_unlock(&stp->st_mutex);
return retstp;
out_close:
spin_unlock(&clp->cl_lock);
mutex_unlock(&stp->st_mutex);
return NULL;
} }
static struct nfs4_ol_stateid * static struct nfs4_ol_stateid *
@ -6466,7 +6473,7 @@ find_or_create_lock_stateid(struct nfs4_lockowner *lo, struct nfs4_file *fi,
*new = false; *new = false;
spin_lock(&clp->cl_lock); spin_lock(&clp->cl_lock);
lst = find_lock_stateid(lo, fi); lst = find_lock_stateid(lo, ost);
spin_unlock(&clp->cl_lock); spin_unlock(&clp->cl_lock);
if (lst != NULL) { if (lst != NULL) {
if (nfsd4_lock_ol_stateid(lst) == nfs_ok) if (nfsd4_lock_ol_stateid(lst) == nfs_ok)

View File

@ -3591,23 +3591,22 @@ static __be32 nfsd4_encode_readv(struct nfsd4_compoundres *resp,
__be32 nfserr; __be32 nfserr;
__be32 tmp; __be32 tmp;
__be32 *p; __be32 *p;
u32 zzz = 0;
int pad; int pad;
/*
* svcrdma requires every READ payload to start somewhere
* in xdr->pages.
*/
if (xdr->iov == xdr->buf->head) {
xdr->iov = NULL;
xdr->end = xdr->p;
}
len = maxcount; len = maxcount;
v = 0; v = 0;
thislen = min_t(long, len, ((void *)xdr->end - (void *)xdr->p));
p = xdr_reserve_space(xdr, (thislen+3)&~3);
WARN_ON_ONCE(!p);
resp->rqstp->rq_vec[v].iov_base = p;
resp->rqstp->rq_vec[v].iov_len = thislen;
v++;
len -= thislen;
while (len) { while (len) {
thislen = min_t(long, len, PAGE_SIZE); thislen = min_t(long, len, PAGE_SIZE);
p = xdr_reserve_space(xdr, (thislen+3)&~3); p = xdr_reserve_space(xdr, thislen);
WARN_ON_ONCE(!p); WARN_ON_ONCE(!p);
resp->rqstp->rq_vec[v].iov_base = p; resp->rqstp->rq_vec[v].iov_base = p;
resp->rqstp->rq_vec[v].iov_len = thislen; resp->rqstp->rq_vec[v].iov_len = thislen;
@ -3616,23 +3615,25 @@ static __be32 nfsd4_encode_readv(struct nfsd4_compoundres *resp,
} }
read->rd_vlen = v; read->rd_vlen = v;
len = maxcount;
nfserr = nfsd_readv(resp->rqstp, read->rd_fhp, file, read->rd_offset, nfserr = nfsd_readv(resp->rqstp, read->rd_fhp, file, read->rd_offset,
resp->rqstp->rq_vec, read->rd_vlen, &maxcount, resp->rqstp->rq_vec, read->rd_vlen, &maxcount,
&eof); &eof);
read->rd_length = maxcount; read->rd_length = maxcount;
if (nfserr) if (nfserr)
return nfserr; return nfserr;
xdr_truncate_encode(xdr, starting_len + 8 + ((maxcount+3)&~3)); if (svc_encode_read_payload(resp->rqstp, starting_len + 8, maxcount))
return nfserr_io;
xdr_truncate_encode(xdr, starting_len + 8 + xdr_align_size(maxcount));
tmp = htonl(eof); tmp = htonl(eof);
write_bytes_to_xdr_buf(xdr->buf, starting_len , &tmp, 4); write_bytes_to_xdr_buf(xdr->buf, starting_len , &tmp, 4);
tmp = htonl(maxcount); tmp = htonl(maxcount);
write_bytes_to_xdr_buf(xdr->buf, starting_len + 4, &tmp, 4); write_bytes_to_xdr_buf(xdr->buf, starting_len + 4, &tmp, 4);
tmp = xdr_zero;
pad = (maxcount&3) ? 4 - (maxcount&3) : 0; pad = (maxcount&3) ? 4 - (maxcount&3) : 0;
write_bytes_to_xdr_buf(xdr->buf, starting_len + 8 + maxcount, write_bytes_to_xdr_buf(xdr->buf, starting_len + 8 + maxcount,
&zzz, pad); &tmp, pad);
return 0; return 0;
} }
@ -4005,11 +4006,12 @@ nfsd4_encode_exchange_id(struct nfsd4_compoundres *resp, __be32 nfserr,
int major_id_sz; int major_id_sz;
int server_scope_sz; int server_scope_sz;
uint64_t minor_id = 0; uint64_t minor_id = 0;
struct nfsd_net *nn = net_generic(SVC_NET(resp->rqstp), nfsd_net_id);
major_id = utsname()->nodename; major_id = nn->nfsd_name;
major_id_sz = strlen(major_id); major_id_sz = strlen(nn->nfsd_name);
server_scope = utsname()->nodename; server_scope = nn->nfsd_name;
server_scope_sz = strlen(server_scope); server_scope_sz = strlen(nn->nfsd_name);
p = xdr_reserve_space(xdr, p = xdr_reserve_space(xdr,
8 /* eir_clientid */ + 8 /* eir_clientid */ +

View File

@ -1333,6 +1333,7 @@ void nfsd_client_rmdir(struct dentry *dentry)
dget(dentry); dget(dentry);
ret = simple_rmdir(dir, dentry); ret = simple_rmdir(dir, dentry);
WARN_ON_ONCE(ret); WARN_ON_ONCE(ret);
fsnotify_rmdir(dir, dentry);
d_delete(dentry); d_delete(dentry);
inode_unlock(dir); inode_unlock(dir);
} }

View File

@ -14,6 +14,7 @@
#include "nfsd.h" #include "nfsd.h"
#include "vfs.h" #include "vfs.h"
#include "auth.h" #include "auth.h"
#include "trace.h"
#define NFSDDBG_FACILITY NFSDDBG_FH #define NFSDDBG_FACILITY NFSDDBG_FH
@ -209,11 +210,14 @@ static __be32 nfsd_set_fh_dentry(struct svc_rqst *rqstp, struct svc_fh *fhp)
} }
error = nfserr_stale; error = nfserr_stale;
if (PTR_ERR(exp) == -ENOENT) if (IS_ERR(exp)) {
return error; trace_nfsd_set_fh_dentry_badexport(rqstp, fhp, PTR_ERR(exp));
if (PTR_ERR(exp) == -ENOENT)
return error;
if (IS_ERR(exp))
return nfserrno(PTR_ERR(exp)); return nfserrno(PTR_ERR(exp));
}
if (exp->ex_flags & NFSEXP_NOSUBTREECHECK) { if (exp->ex_flags & NFSEXP_NOSUBTREECHECK) {
/* Elevate privileges so that the lack of 'r' or 'x' /* Elevate privileges so that the lack of 'r' or 'x'
@ -267,6 +271,9 @@ static __be32 nfsd_set_fh_dentry(struct svc_rqst *rqstp, struct svc_fh *fhp)
dentry = exportfs_decode_fh(exp->ex_path.mnt, fid, dentry = exportfs_decode_fh(exp->ex_path.mnt, fid,
data_left, fileid_type, data_left, fileid_type,
nfsd_acceptable, exp); nfsd_acceptable, exp);
if (IS_ERR_OR_NULL(dentry))
trace_nfsd_set_fh_dentry_badhandle(rqstp, fhp,
dentry ? PTR_ERR(dentry) : -ESTALE);
} }
if (dentry == NULL) if (dentry == NULL)
goto out; goto out;

View File

@ -749,6 +749,9 @@ nfsd_svc(int nrservs, struct net *net, const struct cred *cred)
if (nrservs == 0 && nn->nfsd_serv == NULL) if (nrservs == 0 && nn->nfsd_serv == NULL)
goto out; goto out;
strlcpy(nn->nfsd_name, utsname()->nodename,
sizeof(nn->nfsd_name));
error = nfsd_create_serv(net); error = nfsd_create_serv(net);
if (error) if (error)
goto out; goto out;

View File

@ -9,6 +9,7 @@
#define _NFSD_TRACE_H #define _NFSD_TRACE_H
#include <linux/tracepoint.h> #include <linux/tracepoint.h>
#include "export.h"
#include "nfsfh.h" #include "nfsfh.h"
TRACE_EVENT(nfsd_compound, TRACE_EVENT(nfsd_compound,
@ -50,6 +51,127 @@ TRACE_EVENT(nfsd_compound_status,
__get_str(name), __entry->status) __get_str(name), __entry->status)
) )
DECLARE_EVENT_CLASS(nfsd_fh_err_class,
TP_PROTO(struct svc_rqst *rqstp,
struct svc_fh *fhp,
int status),
TP_ARGS(rqstp, fhp, status),
TP_STRUCT__entry(
__field(u32, xid)
__field(u32, fh_hash)
__field(int, status)
),
TP_fast_assign(
__entry->xid = be32_to_cpu(rqstp->rq_xid);
__entry->fh_hash = knfsd_fh_hash(&fhp->fh_handle);
__entry->status = status;
),
TP_printk("xid=0x%08x fh_hash=0x%08x status=%d",
__entry->xid, __entry->fh_hash,
__entry->status)
)
#define DEFINE_NFSD_FH_ERR_EVENT(name) \
DEFINE_EVENT(nfsd_fh_err_class, nfsd_##name, \
TP_PROTO(struct svc_rqst *rqstp, \
struct svc_fh *fhp, \
int status), \
TP_ARGS(rqstp, fhp, status))
DEFINE_NFSD_FH_ERR_EVENT(set_fh_dentry_badexport);
DEFINE_NFSD_FH_ERR_EVENT(set_fh_dentry_badhandle);
TRACE_EVENT(nfsd_exp_find_key,
TP_PROTO(const struct svc_expkey *key,
int status),
TP_ARGS(key, status),
TP_STRUCT__entry(
__field(int, fsidtype)
__array(u32, fsid, 6)
__string(auth_domain, key->ek_client->name)
__field(int, status)
),
TP_fast_assign(
__entry->fsidtype = key->ek_fsidtype;
memcpy(__entry->fsid, key->ek_fsid, 4*6);
__assign_str(auth_domain, key->ek_client->name);
__entry->status = status;
),
TP_printk("fsid=%x::%s domain=%s status=%d",
__entry->fsidtype,
__print_array(__entry->fsid, 6, 4),
__get_str(auth_domain),
__entry->status
)
);
TRACE_EVENT(nfsd_expkey_update,
TP_PROTO(const struct svc_expkey *key, const char *exp_path),
TP_ARGS(key, exp_path),
TP_STRUCT__entry(
__field(int, fsidtype)
__array(u32, fsid, 6)
__string(auth_domain, key->ek_client->name)
__string(path, exp_path)
__field(bool, cache)
),
TP_fast_assign(
__entry->fsidtype = key->ek_fsidtype;
memcpy(__entry->fsid, key->ek_fsid, 4*6);
__assign_str(auth_domain, key->ek_client->name);
__assign_str(path, exp_path);
__entry->cache = !test_bit(CACHE_NEGATIVE, &key->h.flags);
),
TP_printk("fsid=%x::%s domain=%s path=%s cache=%s",
__entry->fsidtype,
__print_array(__entry->fsid, 6, 4),
__get_str(auth_domain),
__get_str(path),
__entry->cache ? "pos" : "neg"
)
);
TRACE_EVENT(nfsd_exp_get_by_name,
TP_PROTO(const struct svc_export *key,
int status),
TP_ARGS(key, status),
TP_STRUCT__entry(
__string(path, key->ex_path.dentry->d_name.name)
__string(auth_domain, key->ex_client->name)
__field(int, status)
),
TP_fast_assign(
__assign_str(path, key->ex_path.dentry->d_name.name);
__assign_str(auth_domain, key->ex_client->name);
__entry->status = status;
),
TP_printk("path=%s domain=%s status=%d",
__get_str(path),
__get_str(auth_domain),
__entry->status
)
);
TRACE_EVENT(nfsd_export_update,
TP_PROTO(const struct svc_export *key),
TP_ARGS(key),
TP_STRUCT__entry(
__string(path, key->ex_path.dentry->d_name.name)
__string(auth_domain, key->ex_client->name)
__field(bool, cache)
),
TP_fast_assign(
__assign_str(path, key->ex_path.dentry->d_name.name);
__assign_str(auth_domain, key->ex_client->name);
__entry->cache = !test_bit(CACHE_NEGATIVE, &key->h.flags);
),
TP_printk("path=%s domain=%s cache=%s",
__get_str(path),
__get_str(auth_domain),
__entry->cache ? "pos" : "neg"
)
);
DECLARE_EVENT_CLASS(nfsd_io_class, DECLARE_EVENT_CLASS(nfsd_io_class,
TP_PROTO(struct svc_rqst *rqstp, TP_PROTO(struct svc_rqst *rqstp,
struct svc_fh *fhp, struct svc_fh *fhp,

View File

@ -179,6 +179,9 @@ sunrpc_cache_update(struct cache_detail *detail,
extern int extern int
sunrpc_cache_pipe_upcall(struct cache_detail *detail, struct cache_head *h); sunrpc_cache_pipe_upcall(struct cache_detail *detail, struct cache_head *h);
extern int
sunrpc_cache_pipe_upcall_timeout(struct cache_detail *detail,
struct cache_head *h);
extern void cache_clean_deferred(void *owner); extern void cache_clean_deferred(void *owner);
@ -206,11 +209,11 @@ static inline void cache_put(struct cache_head *h, struct cache_detail *cd)
static inline bool cache_is_expired(struct cache_detail *detail, struct cache_head *h) static inline bool cache_is_expired(struct cache_detail *detail, struct cache_head *h)
{ {
if (h->expiry_time < seconds_since_boot())
return true;
if (!test_bit(CACHE_VALID, &h->flags)) if (!test_bit(CACHE_VALID, &h->flags))
return false; return false;
return detail->flush_time >= h->last_refresh;
return (h->expiry_time < seconds_since_boot()) ||
(detail->flush_time >= h->last_refresh);
} }
extern int cache_check(struct cache_detail *detail, extern int cache_check(struct cache_detail *detail,

View File

@ -58,7 +58,8 @@ enum {
enum { enum {
rpcrdma_fixed_maxsz = 4, rpcrdma_fixed_maxsz = 4,
rpcrdma_segment_maxsz = 4, rpcrdma_segment_maxsz = 4,
rpcrdma_readchunk_maxsz = 2 + rpcrdma_segment_maxsz, rpcrdma_readseg_maxsz = 1 + rpcrdma_segment_maxsz,
rpcrdma_readchunk_maxsz = 1 + rpcrdma_readseg_maxsz,
}; };
/* /*

View File

@ -380,7 +380,7 @@ struct svc_deferred_req {
struct cache_deferred_req handle; struct cache_deferred_req handle;
size_t xprt_hlen; size_t xprt_hlen;
int argslen; int argslen;
__be32 args[0]; __be32 args[];
}; };
struct svc_process_info { struct svc_process_info {
@ -517,6 +517,9 @@ void svc_wake_up(struct svc_serv *);
void svc_reserve(struct svc_rqst *rqstp, int space); void svc_reserve(struct svc_rqst *rqstp, int space);
struct svc_pool * svc_pool_for_cpu(struct svc_serv *serv, int cpu); struct svc_pool * svc_pool_for_cpu(struct svc_serv *serv, int cpu);
char * svc_print_addr(struct svc_rqst *, char *, size_t); char * svc_print_addr(struct svc_rqst *, char *, size_t);
int svc_encode_read_payload(struct svc_rqst *rqstp,
unsigned int offset,
unsigned int length);
unsigned int svc_fill_write_vector(struct svc_rqst *rqstp, unsigned int svc_fill_write_vector(struct svc_rqst *rqstp,
struct page **pages, struct page **pages,
struct kvec *first, size_t total); struct kvec *first, size_t total);

View File

@ -52,6 +52,7 @@
/* Default and maximum inline threshold sizes */ /* Default and maximum inline threshold sizes */
enum { enum {
RPCRDMA_PULLUP_THRESH = RPCRDMA_V1_DEF_INLINE_SIZE >> 1,
RPCRDMA_DEF_INLINE_THRESH = 4096, RPCRDMA_DEF_INLINE_THRESH = 4096,
RPCRDMA_MAX_INLINE_THRESH = 65536 RPCRDMA_MAX_INLINE_THRESH = 65536
}; };
@ -132,11 +133,16 @@ struct svc_rdma_recv_ctxt {
struct ib_sge rc_recv_sge; struct ib_sge rc_recv_sge;
void *rc_recv_buf; void *rc_recv_buf;
struct xdr_buf rc_arg; struct xdr_buf rc_arg;
struct xdr_stream rc_stream;
bool rc_temp; bool rc_temp;
u32 rc_byte_len; u32 rc_byte_len;
unsigned int rc_page_count; unsigned int rc_page_count;
unsigned int rc_hdr_count; unsigned int rc_hdr_count;
u32 rc_inv_rkey; u32 rc_inv_rkey;
__be32 *rc_write_list;
__be32 *rc_reply_chunk;
unsigned int rc_read_payload_offset;
unsigned int rc_read_payload_length;
struct page *rc_pages[RPCSVC_MAXPAGES]; struct page *rc_pages[RPCSVC_MAXPAGES];
}; };
@ -144,6 +150,8 @@ struct svc_rdma_send_ctxt {
struct list_head sc_list; struct list_head sc_list;
struct ib_send_wr sc_send_wr; struct ib_send_wr sc_send_wr;
struct ib_cqe sc_cqe; struct ib_cqe sc_cqe;
struct xdr_buf sc_hdrbuf;
struct xdr_stream sc_stream;
void *sc_xprt_buf; void *sc_xprt_buf;
int sc_page_count; int sc_page_count;
int sc_cur_sge_no; int sc_cur_sge_no;
@ -170,9 +178,11 @@ extern int svc_rdma_recv_read_chunk(struct svcxprt_rdma *rdma,
struct svc_rqst *rqstp, struct svc_rqst *rqstp,
struct svc_rdma_recv_ctxt *head, __be32 *p); struct svc_rdma_recv_ctxt *head, __be32 *p);
extern int svc_rdma_send_write_chunk(struct svcxprt_rdma *rdma, extern int svc_rdma_send_write_chunk(struct svcxprt_rdma *rdma,
__be32 *wr_ch, struct xdr_buf *xdr); __be32 *wr_ch, struct xdr_buf *xdr,
unsigned int offset,
unsigned long length);
extern int svc_rdma_send_reply_chunk(struct svcxprt_rdma *rdma, extern int svc_rdma_send_reply_chunk(struct svcxprt_rdma *rdma,
__be32 *rp_ch, bool writelist, const struct svc_rdma_recv_ctxt *rctxt,
struct xdr_buf *xdr); struct xdr_buf *xdr);
/* svc_rdma_sendto.c */ /* svc_rdma_sendto.c */
@ -182,13 +192,13 @@ extern struct svc_rdma_send_ctxt *
extern void svc_rdma_send_ctxt_put(struct svcxprt_rdma *rdma, extern void svc_rdma_send_ctxt_put(struct svcxprt_rdma *rdma,
struct svc_rdma_send_ctxt *ctxt); struct svc_rdma_send_ctxt *ctxt);
extern int svc_rdma_send(struct svcxprt_rdma *rdma, struct ib_send_wr *wr); extern int svc_rdma_send(struct svcxprt_rdma *rdma, struct ib_send_wr *wr);
extern void svc_rdma_sync_reply_hdr(struct svcxprt_rdma *rdma,
struct svc_rdma_send_ctxt *ctxt,
unsigned int len);
extern int svc_rdma_map_reply_msg(struct svcxprt_rdma *rdma, extern int svc_rdma_map_reply_msg(struct svcxprt_rdma *rdma,
struct svc_rdma_send_ctxt *ctxt, struct svc_rdma_send_ctxt *sctxt,
struct xdr_buf *xdr, __be32 *wr_lst); const struct svc_rdma_recv_ctxt *rctxt,
struct xdr_buf *xdr);
extern int svc_rdma_sendto(struct svc_rqst *); extern int svc_rdma_sendto(struct svc_rqst *);
extern int svc_rdma_read_payload(struct svc_rqst *rqstp, unsigned int offset,
unsigned int length);
/* svc_rdma_transport.c */ /* svc_rdma_transport.c */
extern int svc_rdma_create_listen(struct svc_serv *, int, struct sockaddr *); extern int svc_rdma_create_listen(struct svc_serv *, int, struct sockaddr *);

View File

@ -21,6 +21,8 @@ struct svc_xprt_ops {
int (*xpo_has_wspace)(struct svc_xprt *); int (*xpo_has_wspace)(struct svc_xprt *);
int (*xpo_recvfrom)(struct svc_rqst *); int (*xpo_recvfrom)(struct svc_rqst *);
int (*xpo_sendto)(struct svc_rqst *); int (*xpo_sendto)(struct svc_rqst *);
int (*xpo_read_payload)(struct svc_rqst *, unsigned int,
unsigned int);
void (*xpo_release_rqst)(struct svc_rqst *); void (*xpo_release_rqst)(struct svc_rqst *);
void (*xpo_detach)(struct svc_xprt *); void (*xpo_detach)(struct svc_xprt *);
void (*xpo_free)(struct svc_xprt *); void (*xpo_free)(struct svc_xprt *);

View File

@ -188,20 +188,6 @@ extern int xdr_buf_read_mic(struct xdr_buf *, struct xdr_netobj *, unsigned int)
extern int read_bytes_from_xdr_buf(struct xdr_buf *, unsigned int, void *, unsigned int); extern int read_bytes_from_xdr_buf(struct xdr_buf *, unsigned int, void *, unsigned int);
extern int write_bytes_to_xdr_buf(struct xdr_buf *, unsigned int, void *, unsigned int); extern int write_bytes_to_xdr_buf(struct xdr_buf *, unsigned int, void *, unsigned int);
/*
* Helper structure for copying from an sk_buff.
*/
struct xdr_skb_reader {
struct sk_buff *skb;
unsigned int offset;
size_t count;
__wsum csum;
};
typedef size_t (*xdr_skb_read_actor)(struct xdr_skb_reader *desc, void *to, size_t len);
extern int csum_partial_copy_to_xdr(struct xdr_buf *, struct sk_buff *);
extern int xdr_encode_word(struct xdr_buf *, unsigned int, u32); extern int xdr_encode_word(struct xdr_buf *, unsigned int, u32);
extern int xdr_decode_word(struct xdr_buf *, unsigned int, u32 *); extern int xdr_decode_word(struct xdr_buf *, unsigned int, u32 *);
@ -300,6 +286,59 @@ xdr_align_size(size_t n)
return (n + mask) & ~mask; return (n + mask) & ~mask;
} }
/**
* xdr_pad_size - Calculate size of an object's pad
* @n: Size of an object being XDR encoded (in bytes)
*
* This implementation avoids the need for conditional
* branches or modulo division.
*
* Return value:
* Size (in bytes) of the needed XDR pad
*/
static inline size_t xdr_pad_size(size_t n)
{
return xdr_align_size(n) - n;
}
/**
* xdr_stream_encode_item_present - Encode a "present" list item
* @xdr: pointer to xdr_stream
*
* Return values:
* On success, returns length in bytes of XDR buffer consumed
* %-EMSGSIZE on XDR buffer overflow
*/
static inline ssize_t xdr_stream_encode_item_present(struct xdr_stream *xdr)
{
const size_t len = sizeof(__be32);
__be32 *p = xdr_reserve_space(xdr, len);
if (unlikely(!p))
return -EMSGSIZE;
*p = xdr_one;
return len;
}
/**
* xdr_stream_encode_item_absent - Encode a "not present" list item
* @xdr: pointer to xdr_stream
*
* Return values:
* On success, returns length in bytes of XDR buffer consumed
* %-EMSGSIZE on XDR buffer overflow
*/
static inline int xdr_stream_encode_item_absent(struct xdr_stream *xdr)
{
const size_t len = sizeof(__be32);
__be32 *p = xdr_reserve_space(xdr, len);
if (unlikely(!p))
return -EMSGSIZE;
*p = xdr_zero;
return len;
}
/** /**
* xdr_stream_encode_u32 - Encode a 32-bit integer * xdr_stream_encode_u32 - Encode a 32-bit integer
* @xdr: pointer to xdr_stream * @xdr: pointer to xdr_stream

View File

@ -126,7 +126,7 @@ DEFINE_GSSAPI_EVENT(verify_mic);
DEFINE_GSSAPI_EVENT(wrap); DEFINE_GSSAPI_EVENT(wrap);
DEFINE_GSSAPI_EVENT(unwrap); DEFINE_GSSAPI_EVENT(unwrap);
TRACE_EVENT(rpcgss_accept_upcall, TRACE_EVENT(rpcgss_svc_accept_upcall,
TP_PROTO( TP_PROTO(
__be32 xid, __be32 xid,
u32 major_status, u32 major_status,
@ -154,6 +154,29 @@ TRACE_EVENT(rpcgss_accept_upcall,
) )
); );
TRACE_EVENT(rpcgss_svc_accept,
TP_PROTO(
__be32 xid,
size_t len
),
TP_ARGS(xid, len),
TP_STRUCT__entry(
__field(u32, xid)
__field(size_t, len)
),
TP_fast_assign(
__entry->xid = be32_to_cpu(xid);
__entry->len = len;
),
TP_printk("xid=0x%08x len=%zu",
__entry->xid, __entry->len
)
);
/** /**
** GSS auth unwrap failures ** GSS auth unwrap failures
@ -268,6 +291,40 @@ TRACE_EVENT(rpcgss_need_reencode,
__entry->ret ? "" : "un") __entry->ret ? "" : "un")
); );
DECLARE_EVENT_CLASS(rpcgss_svc_seqno_class,
TP_PROTO(
__be32 xid,
u32 seqno
),
TP_ARGS(xid, seqno),
TP_STRUCT__entry(
__field(u32, xid)
__field(u32, seqno)
),
TP_fast_assign(
__entry->xid = be32_to_cpu(xid);
__entry->seqno = seqno;
),
TP_printk("xid=0x%08x seqno=%u, request discarded",
__entry->xid, __entry->seqno)
);
#define DEFINE_SVC_SEQNO_EVENT(name) \
DEFINE_EVENT(rpcgss_svc_seqno_class, rpcgss_svc_##name, \
TP_PROTO( \
__be32 xid, \
u32 seqno \
), \
TP_ARGS(xid, seqno))
DEFINE_SVC_SEQNO_EVENT(large_seqno);
DEFINE_SVC_SEQNO_EVENT(old_seqno);
/** /**
** gssd upcall related trace events ** gssd upcall related trace events
**/ **/

View File

@ -1469,7 +1469,7 @@ DECLARE_EVENT_CLASS(svcrdma_segment_event,
); );
#define DEFINE_SEGMENT_EVENT(name) \ #define DEFINE_SEGMENT_EVENT(name) \
DEFINE_EVENT(svcrdma_segment_event, svcrdma_encode_##name,\ DEFINE_EVENT(svcrdma_segment_event, svcrdma_##name,\
TP_PROTO( \ TP_PROTO( \
u32 handle, \ u32 handle, \
u32 length, \ u32 length, \
@ -1477,8 +1477,11 @@ DECLARE_EVENT_CLASS(svcrdma_segment_event,
), \ ), \
TP_ARGS(handle, length, offset)) TP_ARGS(handle, length, offset))
DEFINE_SEGMENT_EVENT(rseg); DEFINE_SEGMENT_EVENT(decode_wseg);
DEFINE_SEGMENT_EVENT(wseg); DEFINE_SEGMENT_EVENT(encode_rseg);
DEFINE_SEGMENT_EVENT(send_rseg);
DEFINE_SEGMENT_EVENT(encode_wseg);
DEFINE_SEGMENT_EVENT(send_wseg);
DECLARE_EVENT_CLASS(svcrdma_chunk_event, DECLARE_EVENT_CLASS(svcrdma_chunk_event,
TP_PROTO( TP_PROTO(
@ -1501,17 +1504,19 @@ DECLARE_EVENT_CLASS(svcrdma_chunk_event,
); );
#define DEFINE_CHUNK_EVENT(name) \ #define DEFINE_CHUNK_EVENT(name) \
DEFINE_EVENT(svcrdma_chunk_event, svcrdma_encode_##name,\ DEFINE_EVENT(svcrdma_chunk_event, svcrdma_##name, \
TP_PROTO( \ TP_PROTO( \
u32 length \ u32 length \
), \ ), \
TP_ARGS(length)) TP_ARGS(length))
DEFINE_CHUNK_EVENT(pzr); DEFINE_CHUNK_EVENT(send_pzr);
DEFINE_CHUNK_EVENT(write); DEFINE_CHUNK_EVENT(encode_write_chunk);
DEFINE_CHUNK_EVENT(reply); DEFINE_CHUNK_EVENT(send_write_chunk);
DEFINE_CHUNK_EVENT(encode_read_chunk);
DEFINE_CHUNK_EVENT(send_reply_chunk);
TRACE_EVENT(svcrdma_encode_read, TRACE_EVENT(svcrdma_send_read_chunk,
TP_PROTO( TP_PROTO(
u32 length, u32 length,
u32 position u32 position
@ -1634,6 +1639,24 @@ TRACE_EVENT(svcrdma_dma_map_rwctx,
) )
); );
TRACE_EVENT(svcrdma_send_pullup,
TP_PROTO(
unsigned int len
),
TP_ARGS(len),
TP_STRUCT__entry(
__field(unsigned int, len)
),
TP_fast_assign(
__entry->len = len;
),
TP_printk("len=%u", __entry->len)
);
TRACE_EVENT(svcrdma_send_failed, TRACE_EVENT(svcrdma_send_failed,
TP_PROTO( TP_PROTO(
const struct svc_rqst *rqst, const struct svc_rqst *rqst,
@ -1813,34 +1836,6 @@ TRACE_EVENT(svcrdma_post_rw,
DEFINE_SENDCOMP_EVENT(read); DEFINE_SENDCOMP_EVENT(read);
DEFINE_SENDCOMP_EVENT(write); DEFINE_SENDCOMP_EVENT(write);
TRACE_EVENT(svcrdma_cm_event,
TP_PROTO(
const struct rdma_cm_event *event,
const struct sockaddr *sap
),
TP_ARGS(event, sap),
TP_STRUCT__entry(
__field(unsigned int, event)
__field(int, status)
__array(__u8, addr, INET6_ADDRSTRLEN + 10)
),
TP_fast_assign(
__entry->event = event->event;
__entry->status = event->status;
snprintf(__entry->addr, sizeof(__entry->addr) - 1,
"%pISpc", sap);
),
TP_printk("addr=%s event=%s (%u/%d)",
__entry->addr,
rdma_show_cm_event(__entry->event),
__entry->event, __entry->status
)
);
TRACE_EVENT(svcrdma_qp_error, TRACE_EVENT(svcrdma_qp_error,
TP_PROTO( TP_PROTO(
const struct ib_event *event, const struct ib_event *event,

View File

@ -14,6 +14,49 @@
#include <linux/net.h> #include <linux/net.h>
#include <linux/tracepoint.h> #include <linux/tracepoint.h>
DECLARE_EVENT_CLASS(xdr_buf_class,
TP_PROTO(
const struct xdr_buf *xdr
),
TP_ARGS(xdr),
TP_STRUCT__entry(
__field(const void *, head_base)
__field(size_t, head_len)
__field(const void *, tail_base)
__field(size_t, tail_len)
__field(unsigned int, page_len)
__field(unsigned int, msg_len)
),
TP_fast_assign(
__entry->head_base = xdr->head[0].iov_base;
__entry->head_len = xdr->head[0].iov_len;
__entry->tail_base = xdr->tail[0].iov_base;
__entry->tail_len = xdr->tail[0].iov_len;
__entry->page_len = xdr->page_len;
__entry->msg_len = xdr->len;
),
TP_printk("head=[%p,%zu] page=%u tail=[%p,%zu] len=%u",
__entry->head_base, __entry->head_len, __entry->page_len,
__entry->tail_base, __entry->tail_len, __entry->msg_len
)
);
#define DEFINE_XDRBUF_EVENT(name) \
DEFINE_EVENT(xdr_buf_class, name, \
TP_PROTO( \
const struct xdr_buf *xdr \
), \
TP_ARGS(xdr))
DEFINE_XDRBUF_EVENT(xprt_sendto);
DEFINE_XDRBUF_EVENT(xprt_recvfrom);
DEFINE_XDRBUF_EVENT(svc_recvfrom);
DEFINE_XDRBUF_EVENT(svc_sendto);
TRACE_DEFINE_ENUM(RPC_AUTH_OK); TRACE_DEFINE_ENUM(RPC_AUTH_OK);
TRACE_DEFINE_ENUM(RPC_AUTH_BADCRED); TRACE_DEFINE_ENUM(RPC_AUTH_BADCRED);
TRACE_DEFINE_ENUM(RPC_AUTH_REJECTEDCRED); TRACE_DEFINE_ENUM(RPC_AUTH_REJECTEDCRED);
@ -1292,6 +1335,39 @@ DECLARE_EVENT_CLASS(svc_deferred_event,
DEFINE_SVC_DEFERRED_EVENT(drop); DEFINE_SVC_DEFERRED_EVENT(drop);
DEFINE_SVC_DEFERRED_EVENT(revisit); DEFINE_SVC_DEFERRED_EVENT(revisit);
DECLARE_EVENT_CLASS(cache_event,
TP_PROTO(
const struct cache_detail *cd,
const struct cache_head *h
),
TP_ARGS(cd, h),
TP_STRUCT__entry(
__field(const struct cache_head *, h)
__string(name, cd->name)
),
TP_fast_assign(
__entry->h = h;
__assign_str(name, cd->name);
),
TP_printk("cache=%s entry=%p", __get_str(name), __entry->h)
);
#define DEFINE_CACHE_EVENT(name) \
DEFINE_EVENT(cache_event, name, \
TP_PROTO( \
const struct cache_detail *cd, \
const struct cache_head *h \
), \
TP_ARGS(cd, h))
DEFINE_CACHE_EVENT(cache_entry_expired);
DEFINE_CACHE_EVENT(cache_entry_upcall);
DEFINE_CACHE_EVENT(cache_entry_update);
DEFINE_CACHE_EVENT(cache_entry_make_negative);
DEFINE_CACHE_EVENT(cache_entry_no_listener);
#endif /* _TRACE_SUNRPC_H */ #endif /* _TRACE_SUNRPC_H */
#include <trace/define_trace.h> #include <trace/define_trace.h>

View File

@ -1877,7 +1877,7 @@ static int gss_wrap_req_priv(struct rpc_cred *cred, struct gss_cl_ctx *ctx,
else else
iov = snd_buf->head; iov = snd_buf->head;
p = iov->iov_base + iov->iov_len; p = iov->iov_base + iov->iov_len;
pad = 3 - ((snd_buf->len - offset - 1) & 3); pad = xdr_pad_size(snd_buf->len - offset);
memset(p, 0, pad); memset(p, 0, pad);
iov->iov_len += pad; iov->iov_len += pad;
snd_buf->len += pad; snd_buf->len += pad;

View File

@ -55,10 +55,6 @@
#include "gss_rpc_upcall.h" #include "gss_rpc_upcall.h"
#if IS_ENABLED(CONFIG_SUNRPC_DEBUG)
# define RPCDBG_FACILITY RPCDBG_AUTH
#endif
/* The rpcsec_init cache is used for mapping RPCSEC_GSS_{,CONT_}INIT requests /* The rpcsec_init cache is used for mapping RPCSEC_GSS_{,CONT_}INIT requests
* into replies. * into replies.
* *
@ -184,6 +180,11 @@ static struct cache_head *rsi_alloc(void)
return NULL; return NULL;
} }
static int rsi_upcall(struct cache_detail *cd, struct cache_head *h)
{
return sunrpc_cache_pipe_upcall_timeout(cd, h);
}
static void rsi_request(struct cache_detail *cd, static void rsi_request(struct cache_detail *cd,
struct cache_head *h, struct cache_head *h,
char **bpp, int *blen) char **bpp, int *blen)
@ -282,6 +283,7 @@ static const struct cache_detail rsi_cache_template = {
.hash_size = RSI_HASHMAX, .hash_size = RSI_HASHMAX,
.name = "auth.rpcsec.init", .name = "auth.rpcsec.init",
.cache_put = rsi_put, .cache_put = rsi_put,
.cache_upcall = rsi_upcall,
.cache_request = rsi_request, .cache_request = rsi_request,
.cache_parse = rsi_parse, .cache_parse = rsi_parse,
.match = rsi_match, .match = rsi_match,
@ -428,6 +430,11 @@ rsc_alloc(void)
return NULL; return NULL;
} }
static int rsc_upcall(struct cache_detail *cd, struct cache_head *h)
{
return -EINVAL;
}
static int rsc_parse(struct cache_detail *cd, static int rsc_parse(struct cache_detail *cd,
char *mesg, int mlen) char *mesg, int mlen)
{ {
@ -554,6 +561,7 @@ static const struct cache_detail rsc_cache_template = {
.hash_size = RSC_HASHMAX, .hash_size = RSC_HASHMAX,
.name = "auth.rpcsec.context", .name = "auth.rpcsec.context",
.cache_put = rsc_put, .cache_put = rsc_put,
.cache_upcall = rsc_upcall,
.cache_parse = rsc_parse, .cache_parse = rsc_parse,
.match = rsc_match, .match = rsc_match,
.init = rsc_init, .init = rsc_init,
@ -713,14 +721,12 @@ gss_verify_header(struct svc_rqst *rqstp, struct rsc *rsci,
} }
if (gc->gc_seq > MAXSEQ) { if (gc->gc_seq > MAXSEQ) {
dprintk("RPC: svcauth_gss: discarding request with " trace_rpcgss_svc_large_seqno(rqstp->rq_xid, gc->gc_seq);
"large sequence number %d\n", gc->gc_seq);
*authp = rpcsec_gsserr_ctxproblem; *authp = rpcsec_gsserr_ctxproblem;
return SVC_DENIED; return SVC_DENIED;
} }
if (!gss_check_seq_num(rsci, gc->gc_seq)) { if (!gss_check_seq_num(rsci, gc->gc_seq)) {
dprintk("RPC: svcauth_gss: discarding request with " trace_rpcgss_svc_old_seqno(rqstp->rq_xid, gc->gc_seq);
"old sequence number %d\n", gc->gc_seq);
return SVC_DROP; return SVC_DROP;
} }
return SVC_OK; return SVC_OK;
@ -961,7 +967,7 @@ unwrap_priv_data(struct svc_rqst *rqstp, struct xdr_buf *buf, u32 seq, struct gs
/* XXX: This is very inefficient. It would be better to either do /* XXX: This is very inefficient. It would be better to either do
* this while we encrypt, or maybe in the receive code, if we can peak * this while we encrypt, or maybe in the receive code, if we can peak
* ahead and work out the service and mechanism there. */ * ahead and work out the service and mechanism there. */
offset = buf->head[0].iov_len % 4; offset = xdr_pad_size(buf->head[0].iov_len);
if (offset) { if (offset) {
buf->buflen = RPCSVC_MAXPAYLOAD; buf->buflen = RPCSVC_MAXPAYLOAD;
xdr_shift_buf(buf, offset); xdr_shift_buf(buf, offset);
@ -1245,7 +1251,6 @@ static int gss_proxy_save_rsc(struct cache_detail *cd,
if (!ud->found_creds) { if (!ud->found_creds) {
/* userspace seem buggy, we should always get at least a /* userspace seem buggy, we should always get at least a
* mapping to nobody */ * mapping to nobody */
dprintk("RPC: No creds found!\n");
goto out; goto out;
} else { } else {
struct timespec64 boot; struct timespec64 boot;
@ -1311,8 +1316,8 @@ static int svcauth_gss_proxy_init(struct svc_rqst *rqstp,
if (status) if (status)
goto out; goto out;
trace_rpcgss_accept_upcall(rqstp->rq_xid, ud.major_status, trace_rpcgss_svc_accept_upcall(rqstp->rq_xid, ud.major_status,
ud.minor_status); ud.minor_status);
switch (ud.major_status) { switch (ud.major_status) {
case GSS_S_CONTINUE_NEEDED: case GSS_S_CONTINUE_NEEDED:
@ -1320,31 +1325,23 @@ static int svcauth_gss_proxy_init(struct svc_rqst *rqstp,
break; break;
case GSS_S_COMPLETE: case GSS_S_COMPLETE:
status = gss_proxy_save_rsc(sn->rsc_cache, &ud, &handle); status = gss_proxy_save_rsc(sn->rsc_cache, &ud, &handle);
if (status) { if (status)
pr_info("%s: gss_proxy_save_rsc failed (%d)\n",
__func__, status);
goto out; goto out;
}
cli_handle.data = (u8 *)&handle; cli_handle.data = (u8 *)&handle;
cli_handle.len = sizeof(handle); cli_handle.len = sizeof(handle);
break; break;
default: default:
ret = SVC_CLOSE;
goto out; goto out;
} }
/* Got an answer to the upcall; use it: */ /* Got an answer to the upcall; use it: */
if (gss_write_init_verf(sn->rsc_cache, rqstp, if (gss_write_init_verf(sn->rsc_cache, rqstp,
&cli_handle, &ud.major_status)) { &cli_handle, &ud.major_status))
pr_info("%s: gss_write_init_verf failed\n", __func__);
goto out; goto out;
}
if (gss_write_resv(resv, PAGE_SIZE, if (gss_write_resv(resv, PAGE_SIZE,
&cli_handle, &ud.out_token, &cli_handle, &ud.out_token,
ud.major_status, ud.minor_status)) { ud.major_status, ud.minor_status))
pr_info("%s: gss_write_resv failed\n", __func__);
goto out; goto out;
}
ret = SVC_COMPLETE; ret = SVC_COMPLETE;
out: out:
@ -1495,8 +1492,7 @@ svcauth_gss_accept(struct svc_rqst *rqstp, __be32 *authp)
int ret; int ret;
struct sunrpc_net *sn = net_generic(SVC_NET(rqstp), sunrpc_net_id); struct sunrpc_net *sn = net_generic(SVC_NET(rqstp), sunrpc_net_id);
dprintk("RPC: svcauth_gss: argv->iov_len = %zd\n", trace_rpcgss_svc_accept(rqstp->rq_xid, argv->iov_len);
argv->iov_len);
*authp = rpc_autherr_badcred; *authp = rpc_autherr_badcred;
if (!svcdata) if (!svcdata)
@ -1680,7 +1676,8 @@ svcauth_gss_wrap_resp_integ(struct svc_rqst *rqstp)
goto out; goto out;
integ_offset = (u8 *)(p + 1) - (u8 *)resbuf->head[0].iov_base; integ_offset = (u8 *)(p + 1) - (u8 *)resbuf->head[0].iov_base;
integ_len = resbuf->len - integ_offset; integ_len = resbuf->len - integ_offset;
BUG_ON(integ_len % 4); if (integ_len & 3)
goto out;
*p++ = htonl(integ_len); *p++ = htonl(integ_len);
*p++ = htonl(gc->gc_seq); *p++ = htonl(gc->gc_seq);
if (xdr_buf_subsegment(resbuf, &integ_buf, integ_offset, integ_len)) { if (xdr_buf_subsegment(resbuf, &integ_buf, integ_offset, integ_len)) {
@ -1704,7 +1701,8 @@ svcauth_gss_wrap_resp_integ(struct svc_rqst *rqstp)
resv->iov_len += XDR_QUADLEN(mic.len) << 2; resv->iov_len += XDR_QUADLEN(mic.len) << 2;
/* not strictly required: */ /* not strictly required: */
resbuf->len += XDR_QUADLEN(mic.len) << 2; resbuf->len += XDR_QUADLEN(mic.len) << 2;
BUG_ON(resv->iov_len > PAGE_SIZE); if (resv->iov_len > PAGE_SIZE)
goto out_err;
out: out:
stat = 0; stat = 0;
out_err: out_err:
@ -1740,9 +1738,11 @@ svcauth_gss_wrap_resp_priv(struct svc_rqst *rqstp)
* both the head and tail. * both the head and tail.
*/ */
if (resbuf->tail[0].iov_base) { if (resbuf->tail[0].iov_base) {
BUG_ON(resbuf->tail[0].iov_base >= resbuf->head[0].iov_base if (resbuf->tail[0].iov_base >=
+ PAGE_SIZE); resbuf->head[0].iov_base + PAGE_SIZE)
BUG_ON(resbuf->tail[0].iov_base < resbuf->head[0].iov_base); return -EINVAL;
if (resbuf->tail[0].iov_base < resbuf->head[0].iov_base)
return -EINVAL;
if (resbuf->tail[0].iov_len + resbuf->head[0].iov_len if (resbuf->tail[0].iov_len + resbuf->head[0].iov_len
+ 2 * RPC_MAX_AUTH_SIZE > PAGE_SIZE) + 2 * RPC_MAX_AUTH_SIZE > PAGE_SIZE)
return -ENOMEM; return -ENOMEM;

View File

@ -32,13 +32,13 @@
#include <linux/sunrpc/cache.h> #include <linux/sunrpc/cache.h>
#include <linux/sunrpc/stats.h> #include <linux/sunrpc/stats.h>
#include <linux/sunrpc/rpc_pipe_fs.h> #include <linux/sunrpc/rpc_pipe_fs.h>
#include <trace/events/sunrpc.h>
#include "netns.h" #include "netns.h"
#define RPCDBG_FACILITY RPCDBG_CACHE #define RPCDBG_FACILITY RPCDBG_CACHE
static bool cache_defer_req(struct cache_req *req, struct cache_head *item); static bool cache_defer_req(struct cache_req *req, struct cache_head *item);
static void cache_revisit_request(struct cache_head *item); static void cache_revisit_request(struct cache_head *item);
static bool cache_listeners_exist(struct cache_detail *detail);
static void cache_init(struct cache_head *h, struct cache_detail *detail) static void cache_init(struct cache_head *h, struct cache_detail *detail)
{ {
@ -65,13 +65,14 @@ static struct cache_head *sunrpc_cache_find_rcu(struct cache_detail *detail,
rcu_read_lock(); rcu_read_lock();
hlist_for_each_entry_rcu(tmp, head, cache_list) { hlist_for_each_entry_rcu(tmp, head, cache_list) {
if (detail->match(tmp, key)) { if (!detail->match(tmp, key))
if (cache_is_expired(detail, tmp)) continue;
continue; if (test_bit(CACHE_VALID, &tmp->flags) &&
tmp = cache_get_rcu(tmp); cache_is_expired(detail, tmp))
rcu_read_unlock(); continue;
return tmp; tmp = cache_get_rcu(tmp);
} rcu_read_unlock();
return tmp;
} }
rcu_read_unlock(); rcu_read_unlock();
return NULL; return NULL;
@ -113,18 +114,21 @@ static struct cache_head *sunrpc_cache_add_entry(struct cache_detail *detail,
spin_lock(&detail->hash_lock); spin_lock(&detail->hash_lock);
/* check if entry appeared while we slept */ /* check if entry appeared while we slept */
hlist_for_each_entry_rcu(tmp, head, cache_list) { hlist_for_each_entry_rcu(tmp, head, cache_list,
if (detail->match(tmp, key)) { lockdep_is_held(&detail->hash_lock)) {
if (cache_is_expired(detail, tmp)) { if (!detail->match(tmp, key))
sunrpc_begin_cache_remove_entry(tmp, detail); continue;
freeme = tmp; if (test_bit(CACHE_VALID, &tmp->flags) &&
break; cache_is_expired(detail, tmp)) {
} sunrpc_begin_cache_remove_entry(tmp, detail);
cache_get(tmp); trace_cache_entry_expired(detail, tmp);
spin_unlock(&detail->hash_lock); freeme = tmp;
cache_put(new, detail); break;
return tmp;
} }
cache_get(tmp);
spin_unlock(&detail->hash_lock);
cache_put(new, detail);
return tmp;
} }
hlist_add_head_rcu(&new->cache_list, head); hlist_add_head_rcu(&new->cache_list, head);
@ -174,6 +178,25 @@ static void cache_fresh_unlocked(struct cache_head *head,
} }
} }
static void cache_make_negative(struct cache_detail *detail,
struct cache_head *h)
{
set_bit(CACHE_NEGATIVE, &h->flags);
trace_cache_entry_make_negative(detail, h);
}
static void cache_entry_update(struct cache_detail *detail,
struct cache_head *h,
struct cache_head *new)
{
if (!test_bit(CACHE_NEGATIVE, &new->flags)) {
detail->update(h, new);
trace_cache_entry_update(detail, h);
} else {
cache_make_negative(detail, h);
}
}
struct cache_head *sunrpc_cache_update(struct cache_detail *detail, struct cache_head *sunrpc_cache_update(struct cache_detail *detail,
struct cache_head *new, struct cache_head *old, int hash) struct cache_head *new, struct cache_head *old, int hash)
{ {
@ -186,10 +209,7 @@ struct cache_head *sunrpc_cache_update(struct cache_detail *detail,
if (!test_bit(CACHE_VALID, &old->flags)) { if (!test_bit(CACHE_VALID, &old->flags)) {
spin_lock(&detail->hash_lock); spin_lock(&detail->hash_lock);
if (!test_bit(CACHE_VALID, &old->flags)) { if (!test_bit(CACHE_VALID, &old->flags)) {
if (test_bit(CACHE_NEGATIVE, &new->flags)) cache_entry_update(detail, old, new);
set_bit(CACHE_NEGATIVE, &old->flags);
else
detail->update(old, new);
cache_fresh_locked(old, new->expiry_time, detail); cache_fresh_locked(old, new->expiry_time, detail);
spin_unlock(&detail->hash_lock); spin_unlock(&detail->hash_lock);
cache_fresh_unlocked(old, detail); cache_fresh_unlocked(old, detail);
@ -207,10 +227,7 @@ struct cache_head *sunrpc_cache_update(struct cache_detail *detail,
detail->init(tmp, old); detail->init(tmp, old);
spin_lock(&detail->hash_lock); spin_lock(&detail->hash_lock);
if (test_bit(CACHE_NEGATIVE, &new->flags)) cache_entry_update(detail, tmp, new);
set_bit(CACHE_NEGATIVE, &tmp->flags);
else
detail->update(tmp, new);
hlist_add_head(&tmp->cache_list, &detail->hash_table[hash]); hlist_add_head(&tmp->cache_list, &detail->hash_table[hash]);
detail->entries++; detail->entries++;
cache_get(tmp); cache_get(tmp);
@ -224,13 +241,6 @@ struct cache_head *sunrpc_cache_update(struct cache_detail *detail,
} }
EXPORT_SYMBOL_GPL(sunrpc_cache_update); EXPORT_SYMBOL_GPL(sunrpc_cache_update);
static int cache_make_upcall(struct cache_detail *cd, struct cache_head *h)
{
if (cd->cache_upcall)
return cd->cache_upcall(cd, h);
return sunrpc_cache_pipe_upcall(cd, h);
}
static inline int cache_is_valid(struct cache_head *h) static inline int cache_is_valid(struct cache_head *h)
{ {
if (!test_bit(CACHE_VALID, &h->flags)) if (!test_bit(CACHE_VALID, &h->flags))
@ -259,7 +269,7 @@ static int try_to_negate_entry(struct cache_detail *detail, struct cache_head *h
spin_lock(&detail->hash_lock); spin_lock(&detail->hash_lock);
rv = cache_is_valid(h); rv = cache_is_valid(h);
if (rv == -EAGAIN) { if (rv == -EAGAIN) {
set_bit(CACHE_NEGATIVE, &h->flags); cache_make_negative(detail, h);
cache_fresh_locked(h, seconds_since_boot()+CACHE_NEW_EXPIRY, cache_fresh_locked(h, seconds_since_boot()+CACHE_NEW_EXPIRY,
detail); detail);
rv = -ENOENT; rv = -ENOENT;
@ -303,17 +313,14 @@ int cache_check(struct cache_detail *detail,
(h->expiry_time != 0 && age > refresh_age/2)) { (h->expiry_time != 0 && age > refresh_age/2)) {
dprintk("RPC: Want update, refage=%lld, age=%lld\n", dprintk("RPC: Want update, refage=%lld, age=%lld\n",
refresh_age, age); refresh_age, age);
if (!test_and_set_bit(CACHE_PENDING, &h->flags)) { switch (detail->cache_upcall(detail, h)) {
switch (cache_make_upcall(detail, h)) { case -EINVAL:
case -EINVAL:
rv = try_to_negate_entry(detail, h);
break;
case -EAGAIN:
cache_fresh_unlocked(h, detail);
break;
}
} else if (!cache_listeners_exist(detail))
rv = try_to_negate_entry(detail, h); rv = try_to_negate_entry(detail, h);
break;
case -EAGAIN:
cache_fresh_unlocked(h, detail);
break;
}
} }
if (rv == -EAGAIN) { if (rv == -EAGAIN) {
@ -468,6 +475,7 @@ static int cache_clean(void)
continue; continue;
sunrpc_begin_cache_remove_entry(ch, current_detail); sunrpc_begin_cache_remove_entry(ch, current_detail);
trace_cache_entry_expired(current_detail, ch);
rv = 1; rv = 1;
break; break;
} }
@ -1195,20 +1203,12 @@ static bool cache_listeners_exist(struct cache_detail *detail)
* *
* Each request is at most one page long. * Each request is at most one page long.
*/ */
int sunrpc_cache_pipe_upcall(struct cache_detail *detail, struct cache_head *h) static int cache_pipe_upcall(struct cache_detail *detail, struct cache_head *h)
{ {
char *buf; char *buf;
struct cache_request *crq; struct cache_request *crq;
int ret = 0; int ret = 0;
if (!detail->cache_request)
return -EINVAL;
if (!cache_listeners_exist(detail)) {
warn_no_listener(detail);
return -EINVAL;
}
if (test_bit(CACHE_CLEANED, &h->flags)) if (test_bit(CACHE_CLEANED, &h->flags))
/* Too late to make an upcall */ /* Too late to make an upcall */
return -EAGAIN; return -EAGAIN;
@ -1231,6 +1231,7 @@ int sunrpc_cache_pipe_upcall(struct cache_detail *detail, struct cache_head *h)
if (test_bit(CACHE_PENDING, &h->flags)) { if (test_bit(CACHE_PENDING, &h->flags)) {
crq->item = cache_get(h); crq->item = cache_get(h);
list_add_tail(&crq->q.list, &detail->queue); list_add_tail(&crq->q.list, &detail->queue);
trace_cache_entry_upcall(detail, h);
} else } else
/* Lost a race, no longer PENDING, so don't enqueue */ /* Lost a race, no longer PENDING, so don't enqueue */
ret = -EAGAIN; ret = -EAGAIN;
@ -1242,8 +1243,27 @@ int sunrpc_cache_pipe_upcall(struct cache_detail *detail, struct cache_head *h)
} }
return ret; return ret;
} }
int sunrpc_cache_pipe_upcall(struct cache_detail *detail, struct cache_head *h)
{
if (test_and_set_bit(CACHE_PENDING, &h->flags))
return 0;
return cache_pipe_upcall(detail, h);
}
EXPORT_SYMBOL_GPL(sunrpc_cache_pipe_upcall); EXPORT_SYMBOL_GPL(sunrpc_cache_pipe_upcall);
int sunrpc_cache_pipe_upcall_timeout(struct cache_detail *detail,
struct cache_head *h)
{
if (!cache_listeners_exist(detail)) {
warn_no_listener(detail);
trace_cache_entry_no_listener(detail, h);
return -EINVAL;
}
return sunrpc_cache_pipe_upcall(detail, h);
}
EXPORT_SYMBOL_GPL(sunrpc_cache_pipe_upcall_timeout);
/* /*
* parse a message from user-space and pass it * parse a message from user-space and pass it
* to an appropriate cache * to an appropriate cache

View File

@ -2509,6 +2509,7 @@ call_decode(struct rpc_task *task)
goto out; goto out;
req->rq_rcv_buf.len = req->rq_private_buf.len; req->rq_rcv_buf.len = req->rq_private_buf.len;
trace_xprt_recvfrom(&req->rq_rcv_buf);
/* Check that the softirq receive buffer is valid */ /* Check that the softirq receive buffer is valid */
WARN_ON(memcmp(&req->rq_rcv_buf, &req->rq_private_buf, WARN_ON(memcmp(&req->rq_rcv_buf, &req->rq_private_buf,

View File

@ -14,9 +14,24 @@
#include <linux/types.h> #include <linux/types.h>
#include <linux/pagemap.h> #include <linux/pagemap.h>
#include <linux/udp.h> #include <linux/udp.h>
#include <linux/sunrpc/msg_prot.h>
#include <linux/sunrpc/xdr.h> #include <linux/sunrpc/xdr.h>
#include <linux/export.h> #include <linux/export.h>
#include "socklib.h"
/*
* Helper structure for copying from an sk_buff.
*/
struct xdr_skb_reader {
struct sk_buff *skb;
unsigned int offset;
size_t count;
__wsum csum;
};
typedef size_t (*xdr_skb_read_actor)(struct xdr_skb_reader *desc, void *to,
size_t len);
/** /**
* xdr_skb_read_bits - copy some data bits from skb to internal buffer * xdr_skb_read_bits - copy some data bits from skb to internal buffer
@ -186,3 +201,129 @@ no_checksum:
return 0; return 0;
} }
EXPORT_SYMBOL_GPL(csum_partial_copy_to_xdr); EXPORT_SYMBOL_GPL(csum_partial_copy_to_xdr);
static inline int xprt_sendmsg(struct socket *sock, struct msghdr *msg,
size_t seek)
{
if (seek)
iov_iter_advance(&msg->msg_iter, seek);
return sock_sendmsg(sock, msg);
}
static int xprt_send_kvec(struct socket *sock, struct msghdr *msg,
struct kvec *vec, size_t seek)
{
iov_iter_kvec(&msg->msg_iter, WRITE, vec, 1, vec->iov_len);
return xprt_sendmsg(sock, msg, seek);
}
static int xprt_send_pagedata(struct socket *sock, struct msghdr *msg,
struct xdr_buf *xdr, size_t base)
{
int err;
err = xdr_alloc_bvec(xdr, GFP_KERNEL);
if (err < 0)
return err;
iov_iter_bvec(&msg->msg_iter, WRITE, xdr->bvec, xdr_buf_pagecount(xdr),
xdr->page_len + xdr->page_base);
return xprt_sendmsg(sock, msg, base + xdr->page_base);
}
/* Common case:
* - stream transport
* - sending from byte 0 of the message
* - the message is wholly contained in @xdr's head iovec
*/
static int xprt_send_rm_and_kvec(struct socket *sock, struct msghdr *msg,
rpc_fraghdr marker, struct kvec *vec,
size_t base)
{
struct kvec iov[2] = {
[0] = {
.iov_base = &marker,
.iov_len = sizeof(marker)
},
[1] = *vec,
};
size_t len = iov[0].iov_len + iov[1].iov_len;
iov_iter_kvec(&msg->msg_iter, WRITE, iov, 2, len);
return xprt_sendmsg(sock, msg, base);
}
/**
* xprt_sock_sendmsg - write an xdr_buf directly to a socket
* @sock: open socket to send on
* @msg: socket message metadata
* @xdr: xdr_buf containing this request
* @base: starting position in the buffer
* @marker: stream record marker field
* @sent_p: return the total number of bytes successfully queued for sending
*
* Return values:
* On success, returns zero and fills in @sent_p.
* %-ENOTSOCK if @sock is not a struct socket.
*/
int xprt_sock_sendmsg(struct socket *sock, struct msghdr *msg,
struct xdr_buf *xdr, unsigned int base,
rpc_fraghdr marker, unsigned int *sent_p)
{
unsigned int rmsize = marker ? sizeof(marker) : 0;
unsigned int remainder = rmsize + xdr->len - base;
unsigned int want;
int err = 0;
*sent_p = 0;
if (unlikely(!sock))
return -ENOTSOCK;
msg->msg_flags |= MSG_MORE;
want = xdr->head[0].iov_len + rmsize;
if (base < want) {
unsigned int len = want - base;
remainder -= len;
if (remainder == 0)
msg->msg_flags &= ~MSG_MORE;
if (rmsize)
err = xprt_send_rm_and_kvec(sock, msg, marker,
&xdr->head[0], base);
else
err = xprt_send_kvec(sock, msg, &xdr->head[0], base);
if (remainder == 0 || err != len)
goto out;
*sent_p += err;
base = 0;
} else {
base -= want;
}
if (base < xdr->page_len) {
unsigned int len = xdr->page_len - base;
remainder -= len;
if (remainder == 0)
msg->msg_flags &= ~MSG_MORE;
err = xprt_send_pagedata(sock, msg, xdr, base);
if (remainder == 0 || err != len)
goto out;
*sent_p += err;
base = 0;
} else {
base -= xdr->page_len;
}
if (base >= xdr->tail[0].iov_len)
return 0;
msg->msg_flags &= ~MSG_MORE;
err = xprt_send_kvec(sock, msg, &xdr->tail[0], base);
out:
if (err > 0) {
*sent_p += err;
err = 0;
}
return err;
}

15
net/sunrpc/socklib.h Normal file
View File

@ -0,0 +1,15 @@
/* SPDX-License-Identifier: GPL-2.0 */
/*
* Copyright (C) 1995-1997 Olaf Kirch <okir@monad.swb.de>
* Copyright (C) 2020, Oracle.
*/
#ifndef _NET_SUNRPC_SOCKLIB_H_
#define _NET_SUNRPC_SOCKLIB_H_
int csum_partial_copy_to_xdr(struct xdr_buf *xdr, struct sk_buff *skb);
int xprt_sock_sendmsg(struct socket *sock, struct msghdr *msg,
struct xdr_buf *xdr, unsigned int base,
rpc_fraghdr marker, unsigned int *sent_p);
#endif /* _NET_SUNRPC_SOCKLIB_H_ */

View File

@ -50,10 +50,6 @@ static inline int sock_is_loopback(struct sock *sk)
return loopback; return loopback;
} }
int svc_send_common(struct socket *sock, struct xdr_buf *xdr,
struct page *headpage, unsigned long headoffset,
struct page *tailpage, unsigned long tailoffset);
int rpc_clients_notifier_register(void); int rpc_clients_notifier_register(void);
void rpc_clients_notifier_unregister(void); void rpc_clients_notifier_unregister(void);
#endif /* _NET_SUNRPC_SUNRPC_H */ #endif /* _NET_SUNRPC_SUNRPC_H */

View File

@ -1529,10 +1529,6 @@ svc_process(struct svc_rqst *rqstp)
goto out_drop; goto out_drop;
} }
/* Reserve space for the record marker */
if (rqstp->rq_prot == IPPROTO_TCP)
svc_putnl(resv, 0);
/* Returns 1 for send, 0 for drop */ /* Returns 1 for send, 0 for drop */
if (likely(svc_process_common(rqstp, argv, resv))) if (likely(svc_process_common(rqstp, argv, resv)))
return svc_send(rqstp); return svc_send(rqstp);
@ -1636,6 +1632,22 @@ u32 svc_max_payload(const struct svc_rqst *rqstp)
} }
EXPORT_SYMBOL_GPL(svc_max_payload); EXPORT_SYMBOL_GPL(svc_max_payload);
/**
* svc_encode_read_payload - mark a range of bytes as a READ payload
* @rqstp: svc_rqst to operate on
* @offset: payload's byte offset in rqstp->rq_res
* @length: size of payload, in bytes
*
* Returns zero on success, or a negative errno if a permanent
* error occurred.
*/
int svc_encode_read_payload(struct svc_rqst *rqstp, unsigned int offset,
unsigned int length)
{
return rqstp->rq_xprt->xpt_ops->xpo_read_payload(rqstp, offset, length);
}
EXPORT_SYMBOL_GPL(svc_encode_read_payload);
/** /**
* svc_fill_write_vector - Construct data argument for VFS write call * svc_fill_write_vector - Construct data argument for VFS write call
* @rqstp: svc_rqst to operate on * @rqstp: svc_rqst to operate on

View File

@ -104,8 +104,17 @@ void svc_unreg_xprt_class(struct svc_xprt_class *xcl)
} }
EXPORT_SYMBOL_GPL(svc_unreg_xprt_class); EXPORT_SYMBOL_GPL(svc_unreg_xprt_class);
/* /**
* Format the transport list for printing * svc_print_xprts - Format the transport list for printing
* @buf: target buffer for formatted address
* @maxlen: length of target buffer
*
* Fills in @buf with a string containing a list of transport names, each name
* terminated with '\n'. If the buffer is too small, some entries may be
* missing, but it is guaranteed that all lines in the output buffer are
* complete.
*
* Returns positive length of the filled-in string.
*/ */
int svc_print_xprts(char *buf, int maxlen) int svc_print_xprts(char *buf, int maxlen)
{ {
@ -118,9 +127,9 @@ int svc_print_xprts(char *buf, int maxlen)
list_for_each_entry(xcl, &svc_xprt_class_list, xcl_list) { list_for_each_entry(xcl, &svc_xprt_class_list, xcl_list) {
int slen; int slen;
sprintf(tmpstr, "%s %d\n", xcl->xcl_name, xcl->xcl_max_payload); slen = snprintf(tmpstr, sizeof(tmpstr), "%s %d\n",
slen = strlen(tmpstr); xcl->xcl_name, xcl->xcl_max_payload);
if (len + slen > maxlen) if (slen >= sizeof(tmpstr) || len + slen >= maxlen)
break; break;
len += slen; len += slen;
strcat(buf, tmpstr); strcat(buf, tmpstr);
@ -802,6 +811,8 @@ static int svc_handle_xprt(struct svc_rqst *rqstp, struct svc_xprt *xprt)
len = svc_deferred_recv(rqstp); len = svc_deferred_recv(rqstp);
else else
len = xprt->xpt_ops->xpo_recvfrom(rqstp); len = xprt->xpt_ops->xpo_recvfrom(rqstp);
if (len > 0)
trace_svc_recvfrom(&rqstp->rq_arg);
rqstp->rq_stime = ktime_get(); rqstp->rq_stime = ktime_get();
rqstp->rq_reserved = serv->sv_max_mesg; rqstp->rq_reserved = serv->sv_max_mesg;
atomic_add(rqstp->rq_reserved, &xprt->xpt_reserved); atomic_add(rqstp->rq_reserved, &xprt->xpt_reserved);
@ -905,6 +916,7 @@ int svc_send(struct svc_rqst *rqstp)
xb->len = xb->head[0].iov_len + xb->len = xb->head[0].iov_len +
xb->page_len + xb->page_len +
xb->tail[0].iov_len; xb->tail[0].iov_len;
trace_svc_sendto(xb);
/* Grab mutex to serialize outgoing data. */ /* Grab mutex to serialize outgoing data. */
mutex_lock(&xprt->xpt_mutex); mutex_lock(&xprt->xpt_mutex);

View File

@ -148,6 +148,11 @@ static struct cache_head *ip_map_alloc(void)
return NULL; return NULL;
} }
static int ip_map_upcall(struct cache_detail *cd, struct cache_head *h)
{
return sunrpc_cache_pipe_upcall(cd, h);
}
static void ip_map_request(struct cache_detail *cd, static void ip_map_request(struct cache_detail *cd,
struct cache_head *h, struct cache_head *h,
char **bpp, int *blen) char **bpp, int *blen)
@ -467,6 +472,11 @@ static struct cache_head *unix_gid_alloc(void)
return NULL; return NULL;
} }
static int unix_gid_upcall(struct cache_detail *cd, struct cache_head *h)
{
return sunrpc_cache_pipe_upcall_timeout(cd, h);
}
static void unix_gid_request(struct cache_detail *cd, static void unix_gid_request(struct cache_detail *cd,
struct cache_head *h, struct cache_head *h,
char **bpp, int *blen) char **bpp, int *blen)
@ -584,6 +594,7 @@ static const struct cache_detail unix_gid_cache_template = {
.hash_size = GID_HASHMAX, .hash_size = GID_HASHMAX,
.name = "auth.unix.gid", .name = "auth.unix.gid",
.cache_put = unix_gid_put, .cache_put = unix_gid_put,
.cache_upcall = unix_gid_upcall,
.cache_request = unix_gid_request, .cache_request = unix_gid_request,
.cache_parse = unix_gid_parse, .cache_parse = unix_gid_parse,
.cache_show = unix_gid_show, .cache_show = unix_gid_show,
@ -881,6 +892,7 @@ static const struct cache_detail ip_map_cache_template = {
.hash_size = IP_HASHMAX, .hash_size = IP_HASHMAX,
.name = "auth.unix.ip", .name = "auth.unix.ip",
.cache_put = ip_map_put, .cache_put = ip_map_put,
.cache_upcall = ip_map_upcall,
.cache_request = ip_map_request, .cache_request = ip_map_request,
.cache_parse = ip_map_parse, .cache_parse = ip_map_parse,
.cache_show = ip_map_show, .cache_show = ip_map_show,

View File

@ -55,6 +55,7 @@
#include <linux/sunrpc/stats.h> #include <linux/sunrpc/stats.h>
#include <linux/sunrpc/xprt.h> #include <linux/sunrpc/xprt.h>
#include "socklib.h"
#include "sunrpc.h" #include "sunrpc.h"
#define RPCDBG_FACILITY RPCDBG_SVCXPRT #define RPCDBG_FACILITY RPCDBG_SVCXPRT
@ -174,109 +175,10 @@ static void svc_set_cmsg_data(struct svc_rqst *rqstp, struct cmsghdr *cmh)
} }
} }
/* static int svc_sock_read_payload(struct svc_rqst *rqstp, unsigned int offset,
* send routine intended to be shared by the fore- and back-channel unsigned int length)
*/
int svc_send_common(struct socket *sock, struct xdr_buf *xdr,
struct page *headpage, unsigned long headoffset,
struct page *tailpage, unsigned long tailoffset)
{ {
int result; return 0;
int size;
struct page **ppage = xdr->pages;
size_t base = xdr->page_base;
unsigned int pglen = xdr->page_len;
unsigned int flags = MSG_MORE | MSG_SENDPAGE_NOTLAST;
int slen;
int len = 0;
slen = xdr->len;
/* send head */
if (slen == xdr->head[0].iov_len)
flags = 0;
len = kernel_sendpage(sock, headpage, headoffset,
xdr->head[0].iov_len, flags);
if (len != xdr->head[0].iov_len)
goto out;
slen -= xdr->head[0].iov_len;
if (slen == 0)
goto out;
/* send page data */
size = PAGE_SIZE - base < pglen ? PAGE_SIZE - base : pglen;
while (pglen > 0) {
if (slen == size)
flags = 0;
result = kernel_sendpage(sock, *ppage, base, size, flags);
if (result > 0)
len += result;
if (result != size)
goto out;
slen -= size;
pglen -= size;
size = PAGE_SIZE < pglen ? PAGE_SIZE : pglen;
base = 0;
ppage++;
}
/* send tail */
if (xdr->tail[0].iov_len) {
result = kernel_sendpage(sock, tailpage, tailoffset,
xdr->tail[0].iov_len, 0);
if (result > 0)
len += result;
}
out:
return len;
}
/*
* Generic sendto routine
*/
static int svc_sendto(struct svc_rqst *rqstp, struct xdr_buf *xdr)
{
struct svc_sock *svsk =
container_of(rqstp->rq_xprt, struct svc_sock, sk_xprt);
struct socket *sock = svsk->sk_sock;
union {
struct cmsghdr hdr;
long all[SVC_PKTINFO_SPACE / sizeof(long)];
} buffer;
struct cmsghdr *cmh = &buffer.hdr;
int len = 0;
unsigned long tailoff;
unsigned long headoff;
RPC_IFDEBUG(char buf[RPC_MAX_ADDRBUFLEN]);
if (rqstp->rq_prot == IPPROTO_UDP) {
struct msghdr msg = {
.msg_name = &rqstp->rq_addr,
.msg_namelen = rqstp->rq_addrlen,
.msg_control = cmh,
.msg_controllen = sizeof(buffer),
.msg_flags = MSG_MORE,
};
svc_set_cmsg_data(rqstp, cmh);
if (sock_sendmsg(sock, &msg) < 0)
goto out;
}
tailoff = ((unsigned long)xdr->tail[0].iov_base) & (PAGE_SIZE-1);
headoff = 0;
len = svc_send_common(sock, xdr, rqstp->rq_respages[0], headoff,
rqstp->rq_respages[0], tailoff);
out:
dprintk("svc: socket %p sendto([%p %zu... ], %d) = %d (addr %s)\n",
svsk, xdr->head[0].iov_base, xdr->head[0].iov_len,
xdr->len, len, svc_print_addr(rqstp, buf, sizeof(buf)));
return len;
} }
/* /*
@ -600,17 +502,43 @@ out_free:
return 0; return 0;
} }
static int /**
svc_udp_sendto(struct svc_rqst *rqstp) * svc_udp_sendto - Send out a reply on a UDP socket
* @rqstp: completed svc_rqst
*
* Returns the number of bytes sent, or a negative errno.
*/
static int svc_udp_sendto(struct svc_rqst *rqstp)
{ {
int error; struct svc_xprt *xprt = rqstp->rq_xprt;
struct svc_sock *svsk = container_of(xprt, struct svc_sock, sk_xprt);
struct xdr_buf *xdr = &rqstp->rq_res;
union {
struct cmsghdr hdr;
long all[SVC_PKTINFO_SPACE / sizeof(long)];
} buffer;
struct cmsghdr *cmh = &buffer.hdr;
struct msghdr msg = {
.msg_name = &rqstp->rq_addr,
.msg_namelen = rqstp->rq_addrlen,
.msg_control = cmh,
.msg_controllen = sizeof(buffer),
};
unsigned int uninitialized_var(sent);
int err;
error = svc_sendto(rqstp, &rqstp->rq_res); svc_set_cmsg_data(rqstp, cmh);
if (error == -ECONNREFUSED)
err = xprt_sock_sendmsg(svsk->sk_sock, &msg, xdr, 0, 0, &sent);
xdr_free_bvec(xdr);
if (err == -ECONNREFUSED) {
/* ICMP error on earlier request. */ /* ICMP error on earlier request. */
error = svc_sendto(rqstp, &rqstp->rq_res); err = xprt_sock_sendmsg(svsk->sk_sock, &msg, xdr, 0, 0, &sent);
xdr_free_bvec(xdr);
return error; }
if (err < 0)
return err;
return sent;
} }
static int svc_udp_has_wspace(struct svc_xprt *xprt) static int svc_udp_has_wspace(struct svc_xprt *xprt)
@ -653,6 +581,7 @@ static const struct svc_xprt_ops svc_udp_ops = {
.xpo_create = svc_udp_create, .xpo_create = svc_udp_create,
.xpo_recvfrom = svc_udp_recvfrom, .xpo_recvfrom = svc_udp_recvfrom,
.xpo_sendto = svc_udp_sendto, .xpo_sendto = svc_udp_sendto,
.xpo_read_payload = svc_sock_read_payload,
.xpo_release_rqst = svc_release_udp_skb, .xpo_release_rqst = svc_release_udp_skb,
.xpo_detach = svc_sock_detach, .xpo_detach = svc_sock_detach,
.xpo_free = svc_sock_free, .xpo_free = svc_sock_free,
@ -1128,35 +1057,39 @@ err_noclose:
return 0; /* record not complete */ return 0; /* record not complete */
} }
/* /**
* Send out data on TCP socket. * svc_tcp_sendto - Send out a reply on a TCP socket
* @rqstp: completed svc_rqst
*
* Returns the number of bytes sent, or a negative errno.
*/ */
static int svc_tcp_sendto(struct svc_rqst *rqstp) static int svc_tcp_sendto(struct svc_rqst *rqstp)
{ {
struct xdr_buf *xbufp = &rqstp->rq_res; struct svc_xprt *xprt = rqstp->rq_xprt;
int sent; struct svc_sock *svsk = container_of(xprt, struct svc_sock, sk_xprt);
__be32 reclen; struct xdr_buf *xdr = &rqstp->rq_res;
rpc_fraghdr marker = cpu_to_be32(RPC_LAST_STREAM_FRAGMENT |
(u32)xdr->len);
struct msghdr msg = {
.msg_flags = 0,
};
unsigned int uninitialized_var(sent);
int err;
/* Set up the first element of the reply kvec. err = xprt_sock_sendmsg(svsk->sk_sock, &msg, xdr, 0, marker, &sent);
* Any other kvecs that may be in use have been taken xdr_free_bvec(xdr);
* care of by the server implementation itself. if (err < 0 || sent != (xdr->len + sizeof(marker)))
*/ goto out_close;
reclen = htonl(0x80000000|((xbufp->len ) - 4));
memcpy(xbufp->head[0].iov_base, &reclen, 4);
sent = svc_sendto(rqstp, &rqstp->rq_res);
if (sent != xbufp->len) {
printk(KERN_NOTICE
"rpc-srv/tcp: %s: %s %d when sending %d bytes "
"- shutting down socket\n",
rqstp->rq_xprt->xpt_server->sv_name,
(sent<0)?"got error":"sent only",
sent, xbufp->len);
set_bit(XPT_CLOSE, &rqstp->rq_xprt->xpt_flags);
svc_xprt_enqueue(rqstp->rq_xprt);
sent = -EAGAIN;
}
return sent; return sent;
out_close:
pr_notice("rpc-srv/tcp: %s: %s %d when sending %d bytes - shutting down socket\n",
xprt->xpt_server->sv_name,
(err < 0) ? "got error" : "sent",
(err < 0) ? err : sent, xdr->len);
set_bit(XPT_CLOSE, &xprt->xpt_flags);
svc_xprt_enqueue(xprt);
return -EAGAIN;
} }
static struct svc_xprt *svc_tcp_create(struct svc_serv *serv, static struct svc_xprt *svc_tcp_create(struct svc_serv *serv,
@ -1171,6 +1104,7 @@ static const struct svc_xprt_ops svc_tcp_ops = {
.xpo_create = svc_tcp_create, .xpo_create = svc_tcp_create,
.xpo_recvfrom = svc_tcp_recvfrom, .xpo_recvfrom = svc_tcp_recvfrom,
.xpo_sendto = svc_tcp_sendto, .xpo_sendto = svc_tcp_sendto,
.xpo_read_payload = svc_sock_read_payload,
.xpo_release_rqst = svc_release_skb, .xpo_release_rqst = svc_release_skb,
.xpo_detach = svc_tcp_sock_detach, .xpo_detach = svc_tcp_sock_detach,
.xpo_free = svc_sock_free, .xpo_free = svc_sock_free,

View File

@ -1117,8 +1117,6 @@ void xprt_complete_rqst(struct rpc_task *task, int copied)
struct rpc_rqst *req = task->tk_rqstp; struct rpc_rqst *req = task->tk_rqstp;
struct rpc_xprt *xprt = req->rq_xprt; struct rpc_xprt *xprt = req->rq_xprt;
dprintk("RPC: %5u xid %08x complete (%d bytes received)\n",
task->tk_pid, ntohl(req->rq_xid), copied);
trace_xprt_complete_rqst(xprt, req->rq_xid, copied); trace_xprt_complete_rqst(xprt, req->rq_xid, copied);
xprt->stat.recvs++; xprt->stat.recvs++;
@ -1462,6 +1460,7 @@ xprt_request_transmit(struct rpc_rqst *req, struct rpc_task *snd_task)
*/ */
req->rq_ntrans++; req->rq_ntrans++;
trace_xprt_sendto(&req->rq_snd_buf);
connect_cookie = xprt->connect_cookie; connect_cookie = xprt->connect_cookie;
status = xprt->ops->send_request(req); status = xprt->ops->send_request(req);
if (status != 0) { if (status != 0) {

View File

@ -275,32 +275,6 @@ out:
return n; return n;
} }
static inline int
encode_item_present(struct xdr_stream *xdr)
{
__be32 *p;
p = xdr_reserve_space(xdr, sizeof(*p));
if (unlikely(!p))
return -EMSGSIZE;
*p = xdr_one;
return 0;
}
static inline int
encode_item_not_present(struct xdr_stream *xdr)
{
__be32 *p;
p = xdr_reserve_space(xdr, sizeof(*p));
if (unlikely(!p))
return -EMSGSIZE;
*p = xdr_zero;
return 0;
}
static void static void
xdr_encode_rdma_segment(__be32 *iptr, struct rpcrdma_mr *mr) xdr_encode_rdma_segment(__be32 *iptr, struct rpcrdma_mr *mr)
{ {
@ -414,7 +388,7 @@ static int rpcrdma_encode_read_list(struct rpcrdma_xprt *r_xprt,
} while (nsegs); } while (nsegs);
done: done:
return encode_item_not_present(xdr); return xdr_stream_encode_item_absent(xdr);
} }
/* Register and XDR encode the Write list. Supports encoding a list /* Register and XDR encode the Write list. Supports encoding a list
@ -453,7 +427,7 @@ static int rpcrdma_encode_write_list(struct rpcrdma_xprt *r_xprt,
if (nsegs < 0) if (nsegs < 0)
return nsegs; return nsegs;
if (encode_item_present(xdr) < 0) if (xdr_stream_encode_item_present(xdr) < 0)
return -EMSGSIZE; return -EMSGSIZE;
segcount = xdr_reserve_space(xdr, sizeof(*segcount)); segcount = xdr_reserve_space(xdr, sizeof(*segcount));
if (unlikely(!segcount)) if (unlikely(!segcount))
@ -480,7 +454,7 @@ static int rpcrdma_encode_write_list(struct rpcrdma_xprt *r_xprt,
*segcount = cpu_to_be32(nchunks); *segcount = cpu_to_be32(nchunks);
done: done:
return encode_item_not_present(xdr); return xdr_stream_encode_item_absent(xdr);
} }
/* Register and XDR encode the Reply chunk. Supports encoding an array /* Register and XDR encode the Reply chunk. Supports encoding an array
@ -507,14 +481,14 @@ static int rpcrdma_encode_reply_chunk(struct rpcrdma_xprt *r_xprt,
__be32 *segcount; __be32 *segcount;
if (wtype != rpcrdma_replych) if (wtype != rpcrdma_replych)
return encode_item_not_present(xdr); return xdr_stream_encode_item_absent(xdr);
seg = req->rl_segments; seg = req->rl_segments;
nsegs = rpcrdma_convert_iovs(r_xprt, &rqst->rq_rcv_buf, 0, wtype, seg); nsegs = rpcrdma_convert_iovs(r_xprt, &rqst->rq_rcv_buf, 0, wtype, seg);
if (nsegs < 0) if (nsegs < 0)
return nsegs; return nsegs;
if (encode_item_present(xdr) < 0) if (xdr_stream_encode_item_present(xdr) < 0)
return -EMSGSIZE; return -EMSGSIZE;
segcount = xdr_reserve_space(xdr, sizeof(*segcount)); segcount = xdr_reserve_space(xdr, sizeof(*segcount));
if (unlikely(!segcount)) if (unlikely(!segcount))

View File

@ -117,7 +117,7 @@ static int svc_rdma_bc_sendto(struct svcxprt_rdma *rdma,
{ {
int ret; int ret;
ret = svc_rdma_map_reply_msg(rdma, ctxt, &rqst->rq_snd_buf, NULL); ret = svc_rdma_map_reply_msg(rdma, ctxt, NULL, &rqst->rq_snd_buf);
if (ret < 0) if (ret < 0)
return -EIO; return -EIO;
@ -181,7 +181,9 @@ rpcrdma_bc_send_request(struct svcxprt_rdma *rdma, struct rpc_rqst *rqst)
if (!ctxt) if (!ctxt)
goto drop_connection; goto drop_connection;
p = ctxt->sc_xprt_buf; p = xdr_reserve_space(&ctxt->sc_stream, RPCRDMA_HDRLEN_MIN);
if (!p)
goto put_ctxt;
*p++ = rqst->rq_xid; *p++ = rqst->rq_xid;
*p++ = rpcrdma_version; *p++ = rpcrdma_version;
*p++ = cpu_to_be32(r_xprt->rx_buf.rb_bc_max_requests); *p++ = cpu_to_be32(r_xprt->rx_buf.rb_bc_max_requests);
@ -189,7 +191,6 @@ rpcrdma_bc_send_request(struct svcxprt_rdma *rdma, struct rpc_rqst *rqst)
*p++ = xdr_zero; *p++ = xdr_zero;
*p++ = xdr_zero; *p++ = xdr_zero;
*p = xdr_zero; *p = xdr_zero;
svc_rdma_sync_reply_hdr(rdma, ctxt, RPCRDMA_HDRLEN_MIN);
#ifdef SVCRDMA_BACKCHANNEL_DEBUG #ifdef SVCRDMA_BACKCHANNEL_DEBUG
pr_info("%s: %*ph\n", __func__, 64, rqst->rq_buffer); pr_info("%s: %*ph\n", __func__, 64, rqst->rq_buffer);
@ -197,12 +198,13 @@ rpcrdma_bc_send_request(struct svcxprt_rdma *rdma, struct rpc_rqst *rqst)
rqst->rq_xtime = ktime_get(); rqst->rq_xtime = ktime_get();
rc = svc_rdma_bc_sendto(rdma, rqst, ctxt); rc = svc_rdma_bc_sendto(rdma, rqst, ctxt);
if (rc) { if (rc)
svc_rdma_send_ctxt_put(rdma, ctxt); goto put_ctxt;
goto drop_connection;
}
return 0; return 0;
put_ctxt:
svc_rdma_send_ctxt_put(rdma, ctxt);
drop_connection: drop_connection:
dprintk("svcrdma: failed to send bc call\n"); dprintk("svcrdma: failed to send bc call\n");
return -ENOTCONN; return -ENOTCONN;
@ -250,6 +252,7 @@ xprt_rdma_bc_put(struct rpc_xprt *xprt)
{ {
dprintk("svcrdma: %s: xprt %p\n", __func__, xprt); dprintk("svcrdma: %s: xprt %p\n", __func__, xprt);
xprt_rdma_free_addresses(xprt);
xprt_free(xprt); xprt_free(xprt);
} }

View File

@ -193,6 +193,7 @@ svc_rdma_recv_ctxt_get(struct svcxprt_rdma *rdma)
out: out:
ctxt->rc_page_count = 0; ctxt->rc_page_count = 0;
ctxt->rc_read_payload_length = 0;
return ctxt; return ctxt;
out_empty: out_empty:
@ -357,15 +358,14 @@ static void svc_rdma_build_arg_xdr(struct svc_rqst *rqstp,
arg->len = ctxt->rc_byte_len; arg->len = ctxt->rc_byte_len;
} }
/* This accommodates the largest possible Write chunk, /* This accommodates the largest possible Write chunk.
* in one segment.
*/ */
#define MAX_BYTES_WRITE_SEG ((u32)(RPCSVC_MAXPAGES << PAGE_SHIFT)) #define MAX_BYTES_WRITE_CHUNK ((u32)(RPCSVC_MAXPAGES << PAGE_SHIFT))
/* This accommodates the largest possible Position-Zero /* This accommodates the largest possible Position-Zero
* Read chunk or Reply chunk, in one segment. * Read chunk or Reply chunk.
*/ */
#define MAX_BYTES_SPECIAL_SEG ((u32)((RPCSVC_MAXPAGES + 2) << PAGE_SHIFT)) #define MAX_BYTES_SPECIAL_CHUNK ((u32)((RPCSVC_MAXPAGES + 2) << PAGE_SHIFT))
/* Sanity check the Read list. /* Sanity check the Read list.
* *
@ -373,7 +373,7 @@ static void svc_rdma_build_arg_xdr(struct svc_rqst *rqstp,
* - This implementation supports only one Read chunk. * - This implementation supports only one Read chunk.
* *
* Sanity checks: * Sanity checks:
* - Read list does not overflow buffer. * - Read list does not overflow Receive buffer.
* - Segment size limited by largest NFS data payload. * - Segment size limited by largest NFS data payload.
* *
* The segment count is limited to how many segments can * The segment count is limited to how many segments can
@ -381,30 +381,44 @@ static void svc_rdma_build_arg_xdr(struct svc_rqst *rqstp,
* buffer. That's about 40 Read segments for a 1KB inline * buffer. That's about 40 Read segments for a 1KB inline
* threshold. * threshold.
* *
* Returns pointer to the following Write list. * Return values:
* %true: Read list is valid. @rctxt's xdr_stream is updated
* to point to the first byte past the Read list.
* %false: Read list is corrupt. @rctxt's xdr_stream is left
* in an unknown state.
*/ */
static __be32 *xdr_check_read_list(__be32 *p, const __be32 *end) static bool xdr_check_read_list(struct svc_rdma_recv_ctxt *rctxt)
{ {
u32 position; u32 position, len;
bool first; bool first;
__be32 *p;
p = xdr_inline_decode(&rctxt->rc_stream, sizeof(*p));
if (!p)
return false;
len = 0;
first = true; first = true;
while (*p++ != xdr_zero) { while (*p != xdr_zero) {
if (first) { p = xdr_inline_decode(&rctxt->rc_stream,
position = be32_to_cpup(p++); rpcrdma_readseg_maxsz * sizeof(*p));
first = false; if (!p)
} else if (be32_to_cpup(p++) != position) { return false;
return NULL;
}
p++; /* handle */
if (be32_to_cpup(p++) > MAX_BYTES_SPECIAL_SEG)
return NULL;
p += 2; /* offset */
if (p > end) if (first) {
return NULL; position = be32_to_cpup(p);
first = false;
} else if (be32_to_cpup(p) != position) {
return false;
}
p += 2;
len += be32_to_cpup(p);
p = xdr_inline_decode(&rctxt->rc_stream, sizeof(*p));
if (!p)
return false;
} }
return p; return len <= MAX_BYTES_SPECIAL_CHUNK;
} }
/* The segment count is limited to how many segments can /* The segment count is limited to how many segments can
@ -412,67 +426,100 @@ static __be32 *xdr_check_read_list(__be32 *p, const __be32 *end)
* buffer. That's about 60 Write segments for a 1KB inline * buffer. That's about 60 Write segments for a 1KB inline
* threshold. * threshold.
*/ */
static __be32 *xdr_check_write_chunk(__be32 *p, const __be32 *end, static bool xdr_check_write_chunk(struct svc_rdma_recv_ctxt *rctxt, u32 maxlen)
u32 maxlen)
{ {
u32 i, segcount; u32 i, segcount, total;
__be32 *p;
segcount = be32_to_cpup(p++); p = xdr_inline_decode(&rctxt->rc_stream, sizeof(*p));
if (!p)
return false;
segcount = be32_to_cpup(p);
total = 0;
for (i = 0; i < segcount; i++) { for (i = 0; i < segcount; i++) {
p++; /* handle */ u32 handle, length;
if (be32_to_cpup(p++) > maxlen) u64 offset;
return NULL;
p += 2; /* offset */
if (p > end) p = xdr_inline_decode(&rctxt->rc_stream,
return NULL; rpcrdma_segment_maxsz * sizeof(*p));
if (!p)
return false;
handle = be32_to_cpup(p++);
length = be32_to_cpup(p++);
xdr_decode_hyper(p, &offset);
trace_svcrdma_decode_wseg(handle, length, offset);
total += length;
} }
return total <= maxlen;
return p;
} }
/* Sanity check the Write list. /* Sanity check the Write list.
* *
* Implementation limits: * Implementation limits:
* - This implementation supports only one Write chunk. * - This implementation currently supports only one Write chunk.
* *
* Sanity checks: * Sanity checks:
* - Write list does not overflow buffer. * - Write list does not overflow Receive buffer.
* - Segment size limited by largest NFS data payload. * - Chunk size limited by largest NFS data payload.
* *
* Returns pointer to the following Reply chunk. * Return values:
* %true: Write list is valid. @rctxt's xdr_stream is updated
* to point to the first byte past the Write list.
* %false: Write list is corrupt. @rctxt's xdr_stream is left
* in an unknown state.
*/ */
static __be32 *xdr_check_write_list(__be32 *p, const __be32 *end) static bool xdr_check_write_list(struct svc_rdma_recv_ctxt *rctxt)
{ {
u32 chcount; u32 chcount = 0;
__be32 *p;
chcount = 0; p = xdr_inline_decode(&rctxt->rc_stream, sizeof(*p));
while (*p++ != xdr_zero) { if (!p)
p = xdr_check_write_chunk(p, end, MAX_BYTES_WRITE_SEG); return false;
rctxt->rc_write_list = p;
while (*p != xdr_zero) {
if (!xdr_check_write_chunk(rctxt, MAX_BYTES_WRITE_CHUNK))
return false;
++chcount;
p = xdr_inline_decode(&rctxt->rc_stream, sizeof(*p));
if (!p) if (!p)
return NULL; return false;
if (chcount++ > 1)
return NULL;
} }
return p; if (!chcount)
rctxt->rc_write_list = NULL;
return chcount < 2;
} }
/* Sanity check the Reply chunk. /* Sanity check the Reply chunk.
* *
* Sanity checks: * Sanity checks:
* - Reply chunk does not overflow buffer. * - Reply chunk does not overflow Receive buffer.
* - Segment size limited by largest NFS data payload. * - Chunk size limited by largest NFS data payload.
* *
* Returns pointer to the following RPC header. * Return values:
* %true: Reply chunk is valid. @rctxt's xdr_stream is updated
* to point to the first byte past the Reply chunk.
* %false: Reply chunk is corrupt. @rctxt's xdr_stream is left
* in an unknown state.
*/ */
static __be32 *xdr_check_reply_chunk(__be32 *p, const __be32 *end) static bool xdr_check_reply_chunk(struct svc_rdma_recv_ctxt *rctxt)
{ {
if (*p++ != xdr_zero) { __be32 *p;
p = xdr_check_write_chunk(p, end, MAX_BYTES_SPECIAL_SEG);
if (!p) p = xdr_inline_decode(&rctxt->rc_stream, sizeof(*p));
return NULL; if (!p)
return false;
rctxt->rc_reply_chunk = p;
if (*p != xdr_zero) {
if (!xdr_check_write_chunk(rctxt, MAX_BYTES_SPECIAL_CHUNK))
return false;
} else {
rctxt->rc_reply_chunk = NULL;
} }
return p; return true;
} }
/* RPC-over-RDMA Version One private extension: Remote Invalidation. /* RPC-over-RDMA Version One private extension: Remote Invalidation.
@ -537,60 +584,61 @@ static void svc_rdma_get_inv_rkey(struct svcxprt_rdma *rdma,
ctxt->rc_inv_rkey = be32_to_cpu(inv_rkey); ctxt->rc_inv_rkey = be32_to_cpu(inv_rkey);
} }
/* On entry, xdr->head[0].iov_base points to first byte in the /**
* RPC-over-RDMA header. * svc_rdma_xdr_decode_req - Decode the transport header
* @rq_arg: xdr_buf containing ingress RPC/RDMA message
* @rctxt: state of decoding
*
* On entry, xdr->head[0].iov_base points to first byte of the
* RPC-over-RDMA transport header.
* *
* On successful exit, head[0] points to first byte past the * On successful exit, head[0] points to first byte past the
* RPC-over-RDMA header. For RDMA_MSG, this is the RPC message. * RPC-over-RDMA header. For RDMA_MSG, this is the RPC message.
*
* The length of the RPC-over-RDMA header is returned. * The length of the RPC-over-RDMA header is returned.
* *
* Assumptions: * Assumptions:
* - The transport header is entirely contained in the head iovec. * - The transport header is entirely contained in the head iovec.
*/ */
static int svc_rdma_xdr_decode_req(struct xdr_buf *rq_arg) static int svc_rdma_xdr_decode_req(struct xdr_buf *rq_arg,
struct svc_rdma_recv_ctxt *rctxt)
{ {
__be32 *p, *end, *rdma_argp; __be32 *p, *rdma_argp;
unsigned int hdr_len; unsigned int hdr_len;
/* Verify that there's enough bytes for header + something */
if (rq_arg->len <= RPCRDMA_HDRLEN_ERR)
goto out_short;
rdma_argp = rq_arg->head[0].iov_base; rdma_argp = rq_arg->head[0].iov_base;
if (*(rdma_argp + 1) != rpcrdma_version) xdr_init_decode(&rctxt->rc_stream, rq_arg, rdma_argp, NULL);
goto out_version;
switch (*(rdma_argp + 3)) { p = xdr_inline_decode(&rctxt->rc_stream,
rpcrdma_fixed_maxsz * sizeof(*p));
if (unlikely(!p))
goto out_short;
p++;
if (*p != rpcrdma_version)
goto out_version;
p += 2;
switch (*p) {
case rdma_msg: case rdma_msg:
break; break;
case rdma_nomsg: case rdma_nomsg:
break; break;
case rdma_done: case rdma_done:
goto out_drop; goto out_drop;
case rdma_error: case rdma_error:
goto out_drop; goto out_drop;
default: default:
goto out_proc; goto out_proc;
} }
end = (__be32 *)((unsigned long)rdma_argp + rq_arg->len); if (!xdr_check_read_list(rctxt))
p = xdr_check_read_list(rdma_argp + 4, end);
if (!p)
goto out_inval; goto out_inval;
p = xdr_check_write_list(p, end); if (!xdr_check_write_list(rctxt))
if (!p)
goto out_inval; goto out_inval;
p = xdr_check_reply_chunk(p, end); if (!xdr_check_reply_chunk(rctxt))
if (!p)
goto out_inval;
if (p > end)
goto out_inval; goto out_inval;
rq_arg->head[0].iov_base = p; rq_arg->head[0].iov_base = rctxt->rc_stream.p;
hdr_len = (unsigned long)p - (unsigned long)rdma_argp; hdr_len = xdr_stream_pos(&rctxt->rc_stream);
rq_arg->head[0].iov_len -= hdr_len; rq_arg->head[0].iov_len -= hdr_len;
rq_arg->len -= hdr_len; rq_arg->len -= hdr_len;
trace_svcrdma_decode_rqst(rdma_argp, hdr_len); trace_svcrdma_decode_rqst(rdma_argp, hdr_len);
@ -650,7 +698,6 @@ static void svc_rdma_send_error(struct svcxprt_rdma *xprt,
__be32 *rdma_argp, int status) __be32 *rdma_argp, int status)
{ {
struct svc_rdma_send_ctxt *ctxt; struct svc_rdma_send_ctxt *ctxt;
unsigned int length;
__be32 *p; __be32 *p;
int ret; int ret;
@ -658,29 +705,46 @@ static void svc_rdma_send_error(struct svcxprt_rdma *xprt,
if (!ctxt) if (!ctxt)
return; return;
p = ctxt->sc_xprt_buf; p = xdr_reserve_space(&ctxt->sc_stream,
rpcrdma_fixed_maxsz * sizeof(*p));
if (!p)
goto put_ctxt;
*p++ = *rdma_argp; *p++ = *rdma_argp;
*p++ = *(rdma_argp + 1); *p++ = *(rdma_argp + 1);
*p++ = xprt->sc_fc_credits; *p++ = xprt->sc_fc_credits;
*p++ = rdma_error; *p = rdma_error;
switch (status) { switch (status) {
case -EPROTONOSUPPORT: case -EPROTONOSUPPORT:
p = xdr_reserve_space(&ctxt->sc_stream, 3 * sizeof(*p));
if (!p)
goto put_ctxt;
*p++ = err_vers; *p++ = err_vers;
*p++ = rpcrdma_version; *p++ = rpcrdma_version;
*p++ = rpcrdma_version; *p = rpcrdma_version;
trace_svcrdma_err_vers(*rdma_argp); trace_svcrdma_err_vers(*rdma_argp);
break; break;
default: default:
*p++ = err_chunk; p = xdr_reserve_space(&ctxt->sc_stream, sizeof(*p));
if (!p)
goto put_ctxt;
*p = err_chunk;
trace_svcrdma_err_chunk(*rdma_argp); trace_svcrdma_err_chunk(*rdma_argp);
} }
length = (unsigned long)p - (unsigned long)ctxt->sc_xprt_buf;
svc_rdma_sync_reply_hdr(xprt, ctxt, length);
ctxt->sc_send_wr.num_sge = 1;
ctxt->sc_send_wr.opcode = IB_WR_SEND; ctxt->sc_send_wr.opcode = IB_WR_SEND;
ctxt->sc_sges[0].length = ctxt->sc_hdrbuf.len;
ret = svc_rdma_send(xprt, &ctxt->sc_send_wr); ret = svc_rdma_send(xprt, &ctxt->sc_send_wr);
if (ret) if (ret)
svc_rdma_send_ctxt_put(xprt, ctxt); goto put_ctxt;
return;
put_ctxt:
svc_rdma_send_ctxt_put(xprt, ctxt);
} }
/* By convention, backchannel calls arrive via rdma_msg type /* By convention, backchannel calls arrive via rdma_msg type
@ -785,7 +849,7 @@ int svc_rdma_recvfrom(struct svc_rqst *rqstp)
rqstp->rq_next_page = rqstp->rq_respages; rqstp->rq_next_page = rqstp->rq_respages;
p = (__be32 *)rqstp->rq_arg.head[0].iov_base; p = (__be32 *)rqstp->rq_arg.head[0].iov_base;
ret = svc_rdma_xdr_decode_req(&rqstp->rq_arg); ret = svc_rdma_xdr_decode_req(&rqstp->rq_arg, ctxt);
if (ret < 0) if (ret < 0)
goto out_err; goto out_err;
if (ret == 0) if (ret == 0)

View File

@ -41,7 +41,7 @@ struct svc_rdma_rw_ctxt {
struct rdma_rw_ctx rw_ctx; struct rdma_rw_ctx rw_ctx;
int rw_nents; int rw_nents;
struct sg_table rw_sg_table; struct sg_table rw_sg_table;
struct scatterlist rw_first_sgl[0]; struct scatterlist rw_first_sgl[];
}; };
static inline struct svc_rdma_rw_ctxt * static inline struct svc_rdma_rw_ctxt *
@ -439,7 +439,8 @@ svc_rdma_build_writes(struct svc_rdma_write_info *info,
if (ret < 0) if (ret < 0)
goto out_initerr; goto out_initerr;
trace_svcrdma_encode_wseg(seg_handle, write_len, seg_offset); trace_svcrdma_send_wseg(seg_handle, write_len, seg_offset);
list_add(&ctxt->rw_list, &cc->cc_rwctxts); list_add(&ctxt->rw_list, &cc->cc_rwctxts);
cc->cc_sqecount += ret; cc->cc_sqecount += ret;
if (write_len == seg_length - info->wi_seg_off) { if (write_len == seg_length - info->wi_seg_off) {
@ -482,18 +483,19 @@ static int svc_rdma_send_xdr_kvec(struct svc_rdma_write_info *info,
vec->iov_len); vec->iov_len);
} }
/* Send an xdr_buf's page list by itself. A Write chunk is /* Send an xdr_buf's page list by itself. A Write chunk is just
* just the page list. a Reply chunk is the head, page list, * the page list. A Reply chunk is @xdr's head, page list, and
* and tail. This function is shared between the two types * tail. This function is shared between the two types of chunk.
* of chunk.
*/ */
static int svc_rdma_send_xdr_pagelist(struct svc_rdma_write_info *info, static int svc_rdma_send_xdr_pagelist(struct svc_rdma_write_info *info,
struct xdr_buf *xdr) struct xdr_buf *xdr,
unsigned int offset,
unsigned long length)
{ {
info->wi_xdr = xdr; info->wi_xdr = xdr;
info->wi_next_off = 0; info->wi_next_off = offset - xdr->head[0].iov_len;
return svc_rdma_build_writes(info, svc_rdma_pagelist_to_sg, return svc_rdma_build_writes(info, svc_rdma_pagelist_to_sg,
xdr->page_len); length);
} }
/** /**
@ -501,6 +503,8 @@ static int svc_rdma_send_xdr_pagelist(struct svc_rdma_write_info *info,
* @rdma: controlling RDMA transport * @rdma: controlling RDMA transport
* @wr_ch: Write chunk provided by client * @wr_ch: Write chunk provided by client
* @xdr: xdr_buf containing the data payload * @xdr: xdr_buf containing the data payload
* @offset: payload's byte offset in @xdr
* @length: size of payload, in bytes
* *
* Returns a non-negative number of bytes the chunk consumed, or * Returns a non-negative number of bytes the chunk consumed, or
* %-E2BIG if the payload was larger than the Write chunk, * %-E2BIG if the payload was larger than the Write chunk,
@ -510,19 +514,20 @@ static int svc_rdma_send_xdr_pagelist(struct svc_rdma_write_info *info,
* %-EIO if rdma_rw initialization failed (DMA mapping, etc). * %-EIO if rdma_rw initialization failed (DMA mapping, etc).
*/ */
int svc_rdma_send_write_chunk(struct svcxprt_rdma *rdma, __be32 *wr_ch, int svc_rdma_send_write_chunk(struct svcxprt_rdma *rdma, __be32 *wr_ch,
struct xdr_buf *xdr) struct xdr_buf *xdr,
unsigned int offset, unsigned long length)
{ {
struct svc_rdma_write_info *info; struct svc_rdma_write_info *info;
int ret; int ret;
if (!xdr->page_len) if (!length)
return 0; return 0;
info = svc_rdma_write_info_alloc(rdma, wr_ch); info = svc_rdma_write_info_alloc(rdma, wr_ch);
if (!info) if (!info)
return -ENOMEM; return -ENOMEM;
ret = svc_rdma_send_xdr_pagelist(info, xdr); ret = svc_rdma_send_xdr_pagelist(info, xdr, offset, length);
if (ret < 0) if (ret < 0)
goto out_err; goto out_err;
@ -530,8 +535,8 @@ int svc_rdma_send_write_chunk(struct svcxprt_rdma *rdma, __be32 *wr_ch,
if (ret < 0) if (ret < 0)
goto out_err; goto out_err;
trace_svcrdma_encode_write(xdr->page_len); trace_svcrdma_send_write_chunk(xdr->page_len);
return xdr->page_len; return length;
out_err: out_err:
svc_rdma_write_info_free(info); svc_rdma_write_info_free(info);
@ -541,8 +546,7 @@ out_err:
/** /**
* svc_rdma_send_reply_chunk - Write all segments in the Reply chunk * svc_rdma_send_reply_chunk - Write all segments in the Reply chunk
* @rdma: controlling RDMA transport * @rdma: controlling RDMA transport
* @rp_ch: Reply chunk provided by client * @rctxt: Write and Reply chunks from client
* @writelist: true if client provided a Write list
* @xdr: xdr_buf containing an RPC Reply * @xdr: xdr_buf containing an RPC Reply
* *
* Returns a non-negative number of bytes the chunk consumed, or * Returns a non-negative number of bytes the chunk consumed, or
@ -552,13 +556,14 @@ out_err:
* %-ENOTCONN if posting failed (connection is lost), * %-ENOTCONN if posting failed (connection is lost),
* %-EIO if rdma_rw initialization failed (DMA mapping, etc). * %-EIO if rdma_rw initialization failed (DMA mapping, etc).
*/ */
int svc_rdma_send_reply_chunk(struct svcxprt_rdma *rdma, __be32 *rp_ch, int svc_rdma_send_reply_chunk(struct svcxprt_rdma *rdma,
bool writelist, struct xdr_buf *xdr) const struct svc_rdma_recv_ctxt *rctxt,
struct xdr_buf *xdr)
{ {
struct svc_rdma_write_info *info; struct svc_rdma_write_info *info;
int consumed, ret; int consumed, ret;
info = svc_rdma_write_info_alloc(rdma, rp_ch); info = svc_rdma_write_info_alloc(rdma, rctxt->rc_reply_chunk);
if (!info) if (!info)
return -ENOMEM; return -ENOMEM;
@ -570,8 +575,10 @@ int svc_rdma_send_reply_chunk(struct svcxprt_rdma *rdma, __be32 *rp_ch,
/* Send the page list in the Reply chunk only if the /* Send the page list in the Reply chunk only if the
* client did not provide Write chunks. * client did not provide Write chunks.
*/ */
if (!writelist && xdr->page_len) { if (!rctxt->rc_write_list && xdr->page_len) {
ret = svc_rdma_send_xdr_pagelist(info, xdr); ret = svc_rdma_send_xdr_pagelist(info, xdr,
xdr->head[0].iov_len,
xdr->page_len);
if (ret < 0) if (ret < 0)
goto out_err; goto out_err;
consumed += xdr->page_len; consumed += xdr->page_len;
@ -588,7 +595,7 @@ int svc_rdma_send_reply_chunk(struct svcxprt_rdma *rdma, __be32 *rp_ch,
if (ret < 0) if (ret < 0)
goto out_err; goto out_err;
trace_svcrdma_encode_reply(consumed); trace_svcrdma_send_reply_chunk(consumed);
return consumed; return consumed;
out_err: out_err:
@ -691,7 +698,7 @@ static int svc_rdma_build_read_chunk(struct svc_rqst *rqstp,
if (ret < 0) if (ret < 0)
break; break;
trace_svcrdma_encode_rseg(rs_handle, rs_length, rs_offset); trace_svcrdma_send_rseg(rs_handle, rs_length, rs_offset);
info->ri_chunklen += rs_length; info->ri_chunklen += rs_length;
} }
@ -722,7 +729,7 @@ static int svc_rdma_build_normal_read_chunk(struct svc_rqst *rqstp,
if (ret < 0) if (ret < 0)
goto out; goto out;
trace_svcrdma_encode_read(info->ri_chunklen, info->ri_position); trace_svcrdma_send_read_chunk(info->ri_chunklen, info->ri_position);
head->rc_hdr_count = 0; head->rc_hdr_count = 0;
@ -778,7 +785,7 @@ static int svc_rdma_build_pz_read_chunk(struct svc_rqst *rqstp,
if (ret < 0) if (ret < 0)
goto out; goto out;
trace_svcrdma_encode_pzr(info->ri_chunklen); trace_svcrdma_send_pzr(info->ri_chunklen);
head->rc_arg.len += info->ri_chunklen; head->rc_arg.len += info->ri_chunklen;
head->rc_arg.buflen += info->ri_chunklen; head->rc_arg.buflen += info->ri_chunklen;

View File

@ -151,6 +151,8 @@ svc_rdma_send_ctxt_alloc(struct svcxprt_rdma *rdma)
ctxt->sc_send_wr.send_flags = IB_SEND_SIGNALED; ctxt->sc_send_wr.send_flags = IB_SEND_SIGNALED;
ctxt->sc_cqe.done = svc_rdma_wc_send; ctxt->sc_cqe.done = svc_rdma_wc_send;
ctxt->sc_xprt_buf = buffer; ctxt->sc_xprt_buf = buffer;
xdr_buf_init(&ctxt->sc_hdrbuf, ctxt->sc_xprt_buf,
rdma->sc_max_req_size);
ctxt->sc_sges[0].addr = addr; ctxt->sc_sges[0].addr = addr;
for (i = 0; i < rdma->sc_max_send_sges; i++) for (i = 0; i < rdma->sc_max_send_sges; i++)
@ -204,6 +206,10 @@ struct svc_rdma_send_ctxt *svc_rdma_send_ctxt_get(struct svcxprt_rdma *rdma)
spin_unlock(&rdma->sc_send_lock); spin_unlock(&rdma->sc_send_lock);
out: out:
rpcrdma_set_xdrlen(&ctxt->sc_hdrbuf, 0);
xdr_init_encode(&ctxt->sc_stream, &ctxt->sc_hdrbuf,
ctxt->sc_xprt_buf, NULL);
ctxt->sc_send_wr.num_sge = 0; ctxt->sc_send_wr.num_sge = 0;
ctxt->sc_cur_sge_no = 0; ctxt->sc_cur_sge_no = 0;
ctxt->sc_page_count = 0; ctxt->sc_page_count = 0;
@ -295,6 +301,12 @@ int svc_rdma_send(struct svcxprt_rdma *rdma, struct ib_send_wr *wr)
might_sleep(); might_sleep();
/* Sync the transport header buffer */
ib_dma_sync_single_for_device(rdma->sc_pd->device,
wr->sg_list[0].addr,
wr->sg_list[0].length,
DMA_TO_DEVICE);
/* If the SQ is full, wait until an SQ entry is available */ /* If the SQ is full, wait until an SQ entry is available */
while (1) { while (1) {
if ((atomic_dec_return(&rdma->sc_sq_avail) < 0)) { if ((atomic_dec_return(&rdma->sc_sq_avail) < 0)) {
@ -322,166 +334,173 @@ int svc_rdma_send(struct svcxprt_rdma *rdma, struct ib_send_wr *wr)
return ret; return ret;
} }
static u32 xdr_padsize(u32 len) /**
{ * svc_rdma_encode_read_list - Encode RPC Reply's Read chunk list
return (len & 3) ? (4 - (len & 3)) : 0; * @sctxt: Send context for the RPC Reply
}
/* Returns length of transport header, in bytes.
*/
static unsigned int svc_rdma_reply_hdr_len(__be32 *rdma_resp)
{
unsigned int nsegs;
__be32 *p;
p = rdma_resp;
/* RPC-over-RDMA V1 replies never have a Read list. */
p += rpcrdma_fixed_maxsz + 1;
/* Skip Write list. */
while (*p++ != xdr_zero) {
nsegs = be32_to_cpup(p++);
p += nsegs * rpcrdma_segment_maxsz;
}
/* Skip Reply chunk. */
if (*p++ != xdr_zero) {
nsegs = be32_to_cpup(p++);
p += nsegs * rpcrdma_segment_maxsz;
}
return (unsigned long)p - (unsigned long)rdma_resp;
}
/* One Write chunk is copied from Call transport header to Reply
* transport header. Each segment's length field is updated to
* reflect number of bytes consumed in the segment.
* *
* Returns number of segments in this chunk. * Return values:
* On success, returns length in bytes of the Reply XDR buffer
* that was consumed by the Reply Read list
* %-EMSGSIZE on XDR buffer overflow
*/ */
static unsigned int xdr_encode_write_chunk(__be32 *dst, __be32 *src, static ssize_t svc_rdma_encode_read_list(struct svc_rdma_send_ctxt *sctxt)
{
/* RPC-over-RDMA version 1 replies never have a Read list. */
return xdr_stream_encode_item_absent(&sctxt->sc_stream);
}
/**
* svc_rdma_encode_write_segment - Encode one Write segment
* @src: matching Write chunk in the RPC Call header
* @sctxt: Send context for the RPC Reply
* @remaining: remaining bytes of the payload left in the Write chunk
*
* Return values:
* On success, returns length in bytes of the Reply XDR buffer
* that was consumed by the Write segment
* %-EMSGSIZE on XDR buffer overflow
*/
static ssize_t svc_rdma_encode_write_segment(__be32 *src,
struct svc_rdma_send_ctxt *sctxt,
unsigned int *remaining)
{
__be32 *p;
const size_t len = rpcrdma_segment_maxsz * sizeof(*p);
u32 handle, length;
u64 offset;
p = xdr_reserve_space(&sctxt->sc_stream, len);
if (!p)
return -EMSGSIZE;
handle = be32_to_cpup(src++);
length = be32_to_cpup(src++);
xdr_decode_hyper(src, &offset);
*p++ = cpu_to_be32(handle);
if (*remaining < length) {
/* segment only partly filled */
length = *remaining;
*remaining = 0;
} else {
/* entire segment was consumed */
*remaining -= length;
}
*p++ = cpu_to_be32(length);
xdr_encode_hyper(p, offset);
trace_svcrdma_encode_wseg(handle, length, offset);
return len;
}
/**
* svc_rdma_encode_write_chunk - Encode one Write chunk
* @src: matching Write chunk in the RPC Call header
* @sctxt: Send context for the RPC Reply
* @remaining: size in bytes of the payload in the Write chunk
*
* Copy a Write chunk from the Call transport header to the
* Reply transport header. Update each segment's length field
* to reflect the number of bytes written in that segment.
*
* Return values:
* On success, returns length in bytes of the Reply XDR buffer
* that was consumed by the Write chunk
* %-EMSGSIZE on XDR buffer overflow
*/
static ssize_t svc_rdma_encode_write_chunk(__be32 *src,
struct svc_rdma_send_ctxt *sctxt,
unsigned int remaining) unsigned int remaining)
{ {
unsigned int i, nsegs; unsigned int i, nsegs;
u32 seg_len; ssize_t len, ret;
/* Write list discriminator */ len = 0;
*dst++ = *src++; trace_svcrdma_encode_write_chunk(remaining);
/* number of segments in this chunk */ src++;
nsegs = be32_to_cpup(src); ret = xdr_stream_encode_item_present(&sctxt->sc_stream);
*dst++ = *src++; if (ret < 0)
return -EMSGSIZE;
len += ret;
nsegs = be32_to_cpup(src++);
ret = xdr_stream_encode_u32(&sctxt->sc_stream, nsegs);
if (ret < 0)
return -EMSGSIZE;
len += ret;
for (i = nsegs; i; i--) { for (i = nsegs; i; i--) {
/* segment's RDMA handle */ ret = svc_rdma_encode_write_segment(src, sctxt, &remaining);
*dst++ = *src++; if (ret < 0)
return -EMSGSIZE;
/* bytes returned in this segment */ src += rpcrdma_segment_maxsz;
seg_len = be32_to_cpu(*src); len += ret;
if (remaining >= seg_len) {
/* entire segment was consumed */
*dst = *src;
remaining -= seg_len;
} else {
/* segment only partly filled */
*dst = cpu_to_be32(remaining);
remaining = 0;
}
dst++; src++;
/* segment's RDMA offset */
*dst++ = *src++;
*dst++ = *src++;
} }
return nsegs; return len;
} }
/* The client provided a Write list in the Call message. Fill in /**
* the segments in the first Write chunk in the Reply's transport * svc_rdma_encode_write_list - Encode RPC Reply's Write chunk list
* @rctxt: Reply context with information about the RPC Call
* @sctxt: Send context for the RPC Reply
* @length: size in bytes of the payload in the first Write chunk
*
* The client provides a Write chunk list in the Call message. Fill
* in the segments in the first Write chunk in the Reply's transport
* header with the number of bytes consumed in each segment. * header with the number of bytes consumed in each segment.
* Remaining chunks are returned unused. * Remaining chunks are returned unused.
* *
* Assumptions: * Assumptions:
* - Client has provided only one Write chunk * - Client has provided only one Write chunk
*
* Return values:
* On success, returns length in bytes of the Reply XDR buffer
* that was consumed by the Reply's Write list
* %-EMSGSIZE on XDR buffer overflow
*/ */
static void svc_rdma_xdr_encode_write_list(__be32 *rdma_resp, __be32 *wr_ch, static ssize_t
unsigned int consumed) svc_rdma_encode_write_list(const struct svc_rdma_recv_ctxt *rctxt,
struct svc_rdma_send_ctxt *sctxt,
unsigned int length)
{ {
unsigned int nsegs; ssize_t len, ret;
__be32 *p, *q;
/* RPC-over-RDMA V1 replies never have a Read list. */ ret = svc_rdma_encode_write_chunk(rctxt->rc_write_list, sctxt, length);
p = rdma_resp + rpcrdma_fixed_maxsz + 1; if (ret < 0)
return ret;
len = ret;
q = wr_ch; /* Terminate the Write list */
while (*q != xdr_zero) { ret = xdr_stream_encode_item_absent(&sctxt->sc_stream);
nsegs = xdr_encode_write_chunk(p, q, consumed); if (ret < 0)
q += 2 + nsegs * rpcrdma_segment_maxsz; return ret;
p += 2 + nsegs * rpcrdma_segment_maxsz;
consumed = 0;
}
/* Terminate Write list */ return len + ret;
*p++ = xdr_zero;
/* Reply chunk discriminator; may be replaced later */
*p = xdr_zero;
} }
/* The client provided a Reply chunk in the Call message. Fill in /**
* the segments in the Reply chunk in the Reply message with the * svc_rdma_encode_reply_chunk - Encode RPC Reply's Reply chunk
* number of bytes consumed in each segment. * @rctxt: Reply context with information about the RPC Call
* @sctxt: Send context for the RPC Reply
* @length: size in bytes of the payload in the Reply chunk
* *
* Assumptions: * Assumptions:
* - Reply can always fit in the provided Reply chunk * - Reply can always fit in the client-provided Reply chunk
*
* Return values:
* On success, returns length in bytes of the Reply XDR buffer
* that was consumed by the Reply's Reply chunk
* %-EMSGSIZE on XDR buffer overflow
*/ */
static void svc_rdma_xdr_encode_reply_chunk(__be32 *rdma_resp, __be32 *rp_ch, static ssize_t
unsigned int consumed) svc_rdma_encode_reply_chunk(const struct svc_rdma_recv_ctxt *rctxt,
struct svc_rdma_send_ctxt *sctxt,
unsigned int length)
{ {
__be32 *p; return svc_rdma_encode_write_chunk(rctxt->rc_reply_chunk, sctxt,
length);
/* Find the Reply chunk in the Reply's xprt header.
* RPC-over-RDMA V1 replies never have a Read list.
*/
p = rdma_resp + rpcrdma_fixed_maxsz + 1;
/* Skip past Write list */
while (*p++ != xdr_zero)
p += 1 + be32_to_cpup(p) * rpcrdma_segment_maxsz;
xdr_encode_write_chunk(p, rp_ch, consumed);
}
/* Parse the RPC Call's transport header.
*/
static void svc_rdma_get_write_arrays(__be32 *rdma_argp,
__be32 **write, __be32 **reply)
{
__be32 *p;
p = rdma_argp + rpcrdma_fixed_maxsz;
/* Read list */
while (*p++ != xdr_zero)
p += 5;
/* Write list */
if (*p != xdr_zero) {
*write = p;
while (*p++ != xdr_zero)
p += 1 + be32_to_cpu(*p) * 4;
} else {
*write = NULL;
p++;
}
/* Reply chunk */
if (*p != xdr_zero)
*reply = p;
else
*reply = NULL;
} }
static int svc_rdma_dma_map_page(struct svcxprt_rdma *rdma, static int svc_rdma_dma_map_page(struct svcxprt_rdma *rdma,
@ -520,38 +539,36 @@ static int svc_rdma_dma_map_buf(struct svcxprt_rdma *rdma,
} }
/** /**
* svc_rdma_sync_reply_hdr - DMA sync the transport header buffer * svc_rdma_pull_up_needed - Determine whether to use pull-up
* @rdma: controlling transport * @rdma: controlling transport
* @ctxt: send_ctxt for the Send WR * @sctxt: send_ctxt for the Send WR
* @len: length of transport header * @rctxt: Write and Reply chunks provided by client
* @xdr: xdr_buf containing RPC message to transmit
* *
*/ * Returns:
void svc_rdma_sync_reply_hdr(struct svcxprt_rdma *rdma, * %true if pull-up must be used
struct svc_rdma_send_ctxt *ctxt, * %false otherwise
unsigned int len)
{
ctxt->sc_sges[0].length = len;
ctxt->sc_send_wr.num_sge++;
ib_dma_sync_single_for_device(rdma->sc_pd->device,
ctxt->sc_sges[0].addr, len,
DMA_TO_DEVICE);
}
/* If the xdr_buf has more elements than the device can
* transmit in a single RDMA Send, then the reply will
* have to be copied into a bounce buffer.
*/ */
static bool svc_rdma_pull_up_needed(struct svcxprt_rdma *rdma, static bool svc_rdma_pull_up_needed(struct svcxprt_rdma *rdma,
struct xdr_buf *xdr, struct svc_rdma_send_ctxt *sctxt,
__be32 *wr_lst) const struct svc_rdma_recv_ctxt *rctxt,
struct xdr_buf *xdr)
{ {
int elements; int elements;
/* For small messages, copying bytes is cheaper than DMA mapping.
*/
if (sctxt->sc_hdrbuf.len + xdr->len < RPCRDMA_PULLUP_THRESH)
return true;
/* Check whether the xdr_buf has more elements than can
* fit in a single RDMA Send.
*/
/* xdr->head */ /* xdr->head */
elements = 1; elements = 1;
/* xdr->pages */ /* xdr->pages */
if (!wr_lst) { if (!rctxt || !rctxt->rc_write_list) {
unsigned int remaining; unsigned int remaining;
unsigned long pageoff; unsigned long pageoff;
@ -573,29 +590,36 @@ static bool svc_rdma_pull_up_needed(struct svcxprt_rdma *rdma,
return elements >= rdma->sc_max_send_sges; return elements >= rdma->sc_max_send_sges;
} }
/* The device is not capable of sending the reply directly. /**
* Assemble the elements of @xdr into the transport header * svc_rdma_pull_up_reply_msg - Copy Reply into a single buffer
* buffer. * @rdma: controlling transport
* @sctxt: send_ctxt for the Send WR; xprt hdr is already prepared
* @rctxt: Write and Reply chunks provided by client
* @xdr: prepared xdr_buf containing RPC message
*
* The device is not capable of sending the reply directly.
* Assemble the elements of @xdr into the transport header buffer.
*
* Returns zero on success, or a negative errno on failure.
*/ */
static int svc_rdma_pull_up_reply_msg(struct svcxprt_rdma *rdma, static int svc_rdma_pull_up_reply_msg(struct svcxprt_rdma *rdma,
struct svc_rdma_send_ctxt *ctxt, struct svc_rdma_send_ctxt *sctxt,
struct xdr_buf *xdr, __be32 *wr_lst) const struct svc_rdma_recv_ctxt *rctxt,
const struct xdr_buf *xdr)
{ {
unsigned char *dst, *tailbase; unsigned char *dst, *tailbase;
unsigned int taillen; unsigned int taillen;
dst = ctxt->sc_xprt_buf; dst = sctxt->sc_xprt_buf + sctxt->sc_hdrbuf.len;
dst += ctxt->sc_sges[0].length;
memcpy(dst, xdr->head[0].iov_base, xdr->head[0].iov_len); memcpy(dst, xdr->head[0].iov_base, xdr->head[0].iov_len);
dst += xdr->head[0].iov_len; dst += xdr->head[0].iov_len;
tailbase = xdr->tail[0].iov_base; tailbase = xdr->tail[0].iov_base;
taillen = xdr->tail[0].iov_len; taillen = xdr->tail[0].iov_len;
if (wr_lst) { if (rctxt && rctxt->rc_write_list) {
u32 xdrpad; u32 xdrpad;
xdrpad = xdr_padsize(xdr->page_len); xdrpad = xdr_pad_size(xdr->page_len);
if (taillen && xdrpad) { if (taillen && xdrpad) {
tailbase += xdrpad; tailbase += xdrpad;
taillen -= xdrpad; taillen -= xdrpad;
@ -621,29 +645,26 @@ static int svc_rdma_pull_up_reply_msg(struct svcxprt_rdma *rdma,
if (taillen) if (taillen)
memcpy(dst, tailbase, taillen); memcpy(dst, tailbase, taillen);
ctxt->sc_sges[0].length += xdr->len; sctxt->sc_sges[0].length += xdr->len;
ib_dma_sync_single_for_device(rdma->sc_pd->device, trace_svcrdma_send_pullup(sctxt->sc_sges[0].length);
ctxt->sc_sges[0].addr,
ctxt->sc_sges[0].length,
DMA_TO_DEVICE);
return 0; return 0;
} }
/* svc_rdma_map_reply_msg - Map the buffer holding RPC message /* svc_rdma_map_reply_msg - DMA map the buffer holding RPC message
* @rdma: controlling transport * @rdma: controlling transport
* @ctxt: send_ctxt for the Send WR * @sctxt: send_ctxt for the Send WR
* @rctxt: Write and Reply chunks provided by client
* @xdr: prepared xdr_buf containing RPC message * @xdr: prepared xdr_buf containing RPC message
* @wr_lst: pointer to Call header's Write list, or NULL
* *
* Load the xdr_buf into the ctxt's sge array, and DMA map each * Load the xdr_buf into the ctxt's sge array, and DMA map each
* element as it is added. * element as it is added. The Send WR's num_sge field is set.
* *
* Returns zero on success, or a negative errno on failure. * Returns zero on success, or a negative errno on failure.
*/ */
int svc_rdma_map_reply_msg(struct svcxprt_rdma *rdma, int svc_rdma_map_reply_msg(struct svcxprt_rdma *rdma,
struct svc_rdma_send_ctxt *ctxt, struct svc_rdma_send_ctxt *sctxt,
struct xdr_buf *xdr, __be32 *wr_lst) const struct svc_rdma_recv_ctxt *rctxt,
struct xdr_buf *xdr)
{ {
unsigned int len, remaining; unsigned int len, remaining;
unsigned long page_off; unsigned long page_off;
@ -652,11 +673,24 @@ int svc_rdma_map_reply_msg(struct svcxprt_rdma *rdma,
u32 xdr_pad; u32 xdr_pad;
int ret; int ret;
if (svc_rdma_pull_up_needed(rdma, xdr, wr_lst)) /* Set up the (persistently-mapped) transport header SGE. */
return svc_rdma_pull_up_reply_msg(rdma, ctxt, xdr, wr_lst); sctxt->sc_send_wr.num_sge = 1;
sctxt->sc_sges[0].length = sctxt->sc_hdrbuf.len;
++ctxt->sc_cur_sge_no; /* If there is a Reply chunk, nothing follows the transport
ret = svc_rdma_dma_map_buf(rdma, ctxt, * header, and we're done here.
*/
if (rctxt && rctxt->rc_reply_chunk)
return 0;
/* For pull-up, svc_rdma_send() will sync the transport header.
* No additional DMA mapping is necessary.
*/
if (svc_rdma_pull_up_needed(rdma, sctxt, rctxt, xdr))
return svc_rdma_pull_up_reply_msg(rdma, sctxt, rctxt, xdr);
++sctxt->sc_cur_sge_no;
ret = svc_rdma_dma_map_buf(rdma, sctxt,
xdr->head[0].iov_base, xdr->head[0].iov_base,
xdr->head[0].iov_len); xdr->head[0].iov_len);
if (ret < 0) if (ret < 0)
@ -667,10 +701,10 @@ int svc_rdma_map_reply_msg(struct svcxprt_rdma *rdma,
* have added XDR padding in the tail buffer, and that * have added XDR padding in the tail buffer, and that
* should not be included inline. * should not be included inline.
*/ */
if (wr_lst) { if (rctxt && rctxt->rc_write_list) {
base = xdr->tail[0].iov_base; base = xdr->tail[0].iov_base;
len = xdr->tail[0].iov_len; len = xdr->tail[0].iov_len;
xdr_pad = xdr_padsize(xdr->page_len); xdr_pad = xdr_pad_size(xdr->page_len);
if (len && xdr_pad) { if (len && xdr_pad) {
base += xdr_pad; base += xdr_pad;
@ -686,8 +720,8 @@ int svc_rdma_map_reply_msg(struct svcxprt_rdma *rdma,
while (remaining) { while (remaining) {
len = min_t(u32, PAGE_SIZE - page_off, remaining); len = min_t(u32, PAGE_SIZE - page_off, remaining);
++ctxt->sc_cur_sge_no; ++sctxt->sc_cur_sge_no;
ret = svc_rdma_dma_map_page(rdma, ctxt, *ppages++, ret = svc_rdma_dma_map_page(rdma, sctxt, *ppages++,
page_off, len); page_off, len);
if (ret < 0) if (ret < 0)
return ret; return ret;
@ -700,8 +734,8 @@ int svc_rdma_map_reply_msg(struct svcxprt_rdma *rdma,
len = xdr->tail[0].iov_len; len = xdr->tail[0].iov_len;
tail: tail:
if (len) { if (len) {
++ctxt->sc_cur_sge_no; ++sctxt->sc_cur_sge_no;
ret = svc_rdma_dma_map_buf(rdma, ctxt, base, len); ret = svc_rdma_dma_map_buf(rdma, sctxt, base, len);
if (ret < 0) if (ret < 0)
return ret; return ret;
} }
@ -748,18 +782,14 @@ static void svc_rdma_save_io_pages(struct svc_rqst *rqstp,
*/ */
static int svc_rdma_send_reply_msg(struct svcxprt_rdma *rdma, static int svc_rdma_send_reply_msg(struct svcxprt_rdma *rdma,
struct svc_rdma_send_ctxt *sctxt, struct svc_rdma_send_ctxt *sctxt,
struct svc_rdma_recv_ctxt *rctxt, const struct svc_rdma_recv_ctxt *rctxt,
struct svc_rqst *rqstp, struct svc_rqst *rqstp)
__be32 *wr_lst, __be32 *rp_ch)
{ {
int ret; int ret;
if (!rp_ch) { ret = svc_rdma_map_reply_msg(rdma, sctxt, rctxt, &rqstp->rq_res);
ret = svc_rdma_map_reply_msg(rdma, sctxt, if (ret < 0)
&rqstp->rq_res, wr_lst); return ret;
if (ret < 0)
return ret;
}
svc_rdma_save_io_pages(rqstp, sctxt); svc_rdma_save_io_pages(rqstp, sctxt);
@ -769,8 +799,6 @@ static int svc_rdma_send_reply_msg(struct svcxprt_rdma *rdma,
} else { } else {
sctxt->sc_send_wr.opcode = IB_WR_SEND; sctxt->sc_send_wr.opcode = IB_WR_SEND;
} }
dprintk("svcrdma: posting Send WR with %u sge(s)\n",
sctxt->sc_send_wr.num_sge);
return svc_rdma_send(rdma, &sctxt->sc_send_wr); return svc_rdma_send(rdma, &sctxt->sc_send_wr);
} }
@ -785,26 +813,31 @@ static int svc_rdma_send_error_msg(struct svcxprt_rdma *rdma,
struct svc_rdma_send_ctxt *ctxt, struct svc_rdma_send_ctxt *ctxt,
struct svc_rqst *rqstp) struct svc_rqst *rqstp)
{ {
struct svc_rdma_recv_ctxt *rctxt = rqstp->rq_xprt_ctxt;
__be32 *rdma_argp = rctxt->rc_recv_buf;
__be32 *p; __be32 *p;
int ret;
p = ctxt->sc_xprt_buf; rpcrdma_set_xdrlen(&ctxt->sc_hdrbuf, 0);
trace_svcrdma_err_chunk(*p); xdr_init_encode(&ctxt->sc_stream, &ctxt->sc_hdrbuf, ctxt->sc_xprt_buf,
p += 3; NULL);
p = xdr_reserve_space(&ctxt->sc_stream, RPCRDMA_HDRLEN_ERR);
if (!p)
return -ENOMSG;
*p++ = *rdma_argp;
*p++ = *(rdma_argp + 1);
*p++ = rdma->sc_fc_credits;
*p++ = rdma_error; *p++ = rdma_error;
*p = err_chunk; *p = err_chunk;
svc_rdma_sync_reply_hdr(rdma, ctxt, RPCRDMA_HDRLEN_ERR); trace_svcrdma_err_chunk(*rdma_argp);
svc_rdma_save_io_pages(rqstp, ctxt); svc_rdma_save_io_pages(rqstp, ctxt);
ctxt->sc_send_wr.num_sge = 1;
ctxt->sc_send_wr.opcode = IB_WR_SEND; ctxt->sc_send_wr.opcode = IB_WR_SEND;
ret = svc_rdma_send(rdma, &ctxt->sc_send_wr); ctxt->sc_sges[0].length = ctxt->sc_hdrbuf.len;
if (ret) { return svc_rdma_send(rdma, &ctxt->sc_send_wr);
svc_rdma_send_ctxt_put(rdma, ctxt);
return ret;
}
return 0;
} }
/** /**
@ -825,14 +858,14 @@ int svc_rdma_sendto(struct svc_rqst *rqstp)
struct svcxprt_rdma *rdma = struct svcxprt_rdma *rdma =
container_of(xprt, struct svcxprt_rdma, sc_xprt); container_of(xprt, struct svcxprt_rdma, sc_xprt);
struct svc_rdma_recv_ctxt *rctxt = rqstp->rq_xprt_ctxt; struct svc_rdma_recv_ctxt *rctxt = rqstp->rq_xprt_ctxt;
__be32 *p, *rdma_argp, *rdma_resp, *wr_lst, *rp_ch; __be32 *rdma_argp = rctxt->rc_recv_buf;
__be32 *wr_lst = rctxt->rc_write_list;
__be32 *rp_ch = rctxt->rc_reply_chunk;
struct xdr_buf *xdr = &rqstp->rq_res; struct xdr_buf *xdr = &rqstp->rq_res;
struct svc_rdma_send_ctxt *sctxt; struct svc_rdma_send_ctxt *sctxt;
__be32 *p;
int ret; int ret;
rdma_argp = rctxt->rc_recv_buf;
svc_rdma_get_write_arrays(rdma_argp, &wr_lst, &rp_ch);
/* Create the RDMA response header. xprt->xpt_mutex, /* Create the RDMA response header. xprt->xpt_mutex,
* acquired in svc_send(), serializes RPC replies. The * acquired in svc_send(), serializes RPC replies. The
* code path below that inserts the credit grant value * code path below that inserts the credit grant value
@ -843,36 +876,52 @@ int svc_rdma_sendto(struct svc_rqst *rqstp)
sctxt = svc_rdma_send_ctxt_get(rdma); sctxt = svc_rdma_send_ctxt_get(rdma);
if (!sctxt) if (!sctxt)
goto err0; goto err0;
rdma_resp = sctxt->sc_xprt_buf;
p = rdma_resp; p = xdr_reserve_space(&sctxt->sc_stream,
rpcrdma_fixed_maxsz * sizeof(*p));
if (!p)
goto err0;
*p++ = *rdma_argp; *p++ = *rdma_argp;
*p++ = *(rdma_argp + 1); *p++ = *(rdma_argp + 1);
*p++ = rdma->sc_fc_credits; *p++ = rdma->sc_fc_credits;
*p++ = rp_ch ? rdma_nomsg : rdma_msg; *p = rp_ch ? rdma_nomsg : rdma_msg;
/* Start with empty chunks */
*p++ = xdr_zero;
*p++ = xdr_zero;
*p = xdr_zero;
if (svc_rdma_encode_read_list(sctxt) < 0)
goto err0;
if (wr_lst) { if (wr_lst) {
/* XXX: Presume the client sent only one Write chunk */ /* XXX: Presume the client sent only one Write chunk */
ret = svc_rdma_send_write_chunk(rdma, wr_lst, xdr); unsigned long offset;
unsigned int length;
if (rctxt->rc_read_payload_length) {
offset = rctxt->rc_read_payload_offset;
length = rctxt->rc_read_payload_length;
} else {
offset = xdr->head[0].iov_len;
length = xdr->page_len;
}
ret = svc_rdma_send_write_chunk(rdma, wr_lst, xdr, offset,
length);
if (ret < 0) if (ret < 0)
goto err2; goto err2;
svc_rdma_xdr_encode_write_list(rdma_resp, wr_lst, ret); if (svc_rdma_encode_write_list(rctxt, sctxt, length) < 0)
goto err0;
} else {
if (xdr_stream_encode_item_absent(&sctxt->sc_stream) < 0)
goto err0;
} }
if (rp_ch) { if (rp_ch) {
ret = svc_rdma_send_reply_chunk(rdma, rp_ch, wr_lst, xdr); ret = svc_rdma_send_reply_chunk(rdma, rctxt, &rqstp->rq_res);
if (ret < 0) if (ret < 0)
goto err2; goto err2;
svc_rdma_xdr_encode_reply_chunk(rdma_resp, rp_ch, ret); if (svc_rdma_encode_reply_chunk(rctxt, sctxt, ret) < 0)
goto err0;
} else {
if (xdr_stream_encode_item_absent(&sctxt->sc_stream) < 0)
goto err0;
} }
svc_rdma_sync_reply_hdr(rdma, sctxt, svc_rdma_reply_hdr_len(rdma_resp)); ret = svc_rdma_send_reply_msg(rdma, sctxt, rctxt, rqstp);
ret = svc_rdma_send_reply_msg(rdma, sctxt, rctxt, rqstp,
wr_lst, rp_ch);
if (ret < 0) if (ret < 0)
goto err1; goto err1;
ret = 0; ret = 0;
@ -900,3 +949,30 @@ out:
ret = -ENOTCONN; ret = -ENOTCONN;
goto out; goto out;
} }
/**
* svc_rdma_read_payload - special processing for a READ payload
* @rqstp: svc_rqst to operate on
* @offset: payload's byte offset in @xdr
* @length: size of payload, in bytes
*
* Returns zero on success.
*
* For the moment, just record the xdr_buf location of the READ
* payload. svc_rdma_sendto will use that location later when
* we actually send the payload.
*/
int svc_rdma_read_payload(struct svc_rqst *rqstp, unsigned int offset,
unsigned int length)
{
struct svc_rdma_recv_ctxt *rctxt = rqstp->rq_xprt_ctxt;
/* XXX: Just one READ payload slot for now, since our
* transport implementation currently supports only one
* Write chunk.
*/
rctxt->rc_read_payload_offset = offset;
rctxt->rc_read_payload_length = length;
return 0;
}

View File

@ -82,6 +82,7 @@ static const struct svc_xprt_ops svc_rdma_ops = {
.xpo_create = svc_rdma_create, .xpo_create = svc_rdma_create,
.xpo_recvfrom = svc_rdma_recvfrom, .xpo_recvfrom = svc_rdma_recvfrom,
.xpo_sendto = svc_rdma_sendto, .xpo_sendto = svc_rdma_sendto,
.xpo_read_payload = svc_rdma_read_payload,
.xpo_release_rqst = svc_rdma_release_rqst, .xpo_release_rqst = svc_rdma_release_rqst,
.xpo_detach = svc_rdma_detach, .xpo_detach = svc_rdma_detach,
.xpo_free = svc_rdma_free, .xpo_free = svc_rdma_free,
@ -240,10 +241,6 @@ static void handle_connect_req(struct rdma_cm_id *new_cma_id,
static int rdma_listen_handler(struct rdma_cm_id *cma_id, static int rdma_listen_handler(struct rdma_cm_id *cma_id,
struct rdma_cm_event *event) struct rdma_cm_event *event)
{ {
struct sockaddr *sap = (struct sockaddr *)&cma_id->route.addr.src_addr;
trace_svcrdma_cm_event(event, sap);
switch (event->event) { switch (event->event) {
case RDMA_CM_EVENT_CONNECT_REQUEST: case RDMA_CM_EVENT_CONNECT_REQUEST:
dprintk("svcrdma: Connect request on cma_id=%p, xprt = %p, " dprintk("svcrdma: Connect request on cma_id=%p, xprt = %p, "
@ -265,12 +262,9 @@ static int rdma_listen_handler(struct rdma_cm_id *cma_id,
static int rdma_cma_handler(struct rdma_cm_id *cma_id, static int rdma_cma_handler(struct rdma_cm_id *cma_id,
struct rdma_cm_event *event) struct rdma_cm_event *event)
{ {
struct sockaddr *sap = (struct sockaddr *)&cma_id->route.addr.dst_addr;
struct svcxprt_rdma *rdma = cma_id->context; struct svcxprt_rdma *rdma = cma_id->context;
struct svc_xprt *xprt = &rdma->sc_xprt; struct svc_xprt *xprt = &rdma->sc_xprt;
trace_svcrdma_cm_event(event, sap);
switch (event->event) { switch (event->event) {
case RDMA_CM_EVENT_ESTABLISHED: case RDMA_CM_EVENT_ESTABLISHED:
/* Accept complete */ /* Accept complete */

View File

@ -54,6 +54,7 @@
#include <trace/events/sunrpc.h> #include <trace/events/sunrpc.h>
#include "socklib.h"
#include "sunrpc.h" #include "sunrpc.h"
static void xs_close(struct rpc_xprt *xprt); static void xs_close(struct rpc_xprt *xprt);
@ -749,125 +750,6 @@ xs_stream_start_connect(struct sock_xprt *transport)
#define XS_SENDMSG_FLAGS (MSG_DONTWAIT | MSG_NOSIGNAL) #define XS_SENDMSG_FLAGS (MSG_DONTWAIT | MSG_NOSIGNAL)
static int xs_sendmsg(struct socket *sock, struct msghdr *msg, size_t seek)
{
if (seek)
iov_iter_advance(&msg->msg_iter, seek);
return sock_sendmsg(sock, msg);
}
static int xs_send_kvec(struct socket *sock, struct msghdr *msg, struct kvec *vec, size_t seek)
{
iov_iter_kvec(&msg->msg_iter, WRITE, vec, 1, vec->iov_len);
return xs_sendmsg(sock, msg, seek);
}
static int xs_send_pagedata(struct socket *sock, struct msghdr *msg, struct xdr_buf *xdr, size_t base)
{
int err;
err = xdr_alloc_bvec(xdr, GFP_KERNEL);
if (err < 0)
return err;
iov_iter_bvec(&msg->msg_iter, WRITE, xdr->bvec,
xdr_buf_pagecount(xdr),
xdr->page_len + xdr->page_base);
return xs_sendmsg(sock, msg, base + xdr->page_base);
}
#define xs_record_marker_len() sizeof(rpc_fraghdr)
/* Common case:
* - stream transport
* - sending from byte 0 of the message
* - the message is wholly contained in @xdr's head iovec
*/
static int xs_send_rm_and_kvec(struct socket *sock, struct msghdr *msg,
rpc_fraghdr marker, struct kvec *vec, size_t base)
{
struct kvec iov[2] = {
[0] = {
.iov_base = &marker,
.iov_len = sizeof(marker)
},
[1] = *vec,
};
size_t len = iov[0].iov_len + iov[1].iov_len;
iov_iter_kvec(&msg->msg_iter, WRITE, iov, 2, len);
return xs_sendmsg(sock, msg, base);
}
/**
* xs_sendpages - write pages directly to a socket
* @sock: socket to send on
* @addr: UDP only -- address of destination
* @addrlen: UDP only -- length of destination address
* @xdr: buffer containing this request
* @base: starting position in the buffer
* @rm: stream record marker field
* @sent_p: return the total number of bytes successfully queued for sending
*
*/
static int xs_sendpages(struct socket *sock, struct sockaddr *addr, int addrlen, struct xdr_buf *xdr, unsigned int base, rpc_fraghdr rm, int *sent_p)
{
struct msghdr msg = {
.msg_name = addr,
.msg_namelen = addrlen,
.msg_flags = XS_SENDMSG_FLAGS | MSG_MORE,
};
unsigned int rmsize = rm ? sizeof(rm) : 0;
unsigned int remainder = rmsize + xdr->len - base;
unsigned int want;
int err = 0;
if (unlikely(!sock))
return -ENOTSOCK;
want = xdr->head[0].iov_len + rmsize;
if (base < want) {
unsigned int len = want - base;
remainder -= len;
if (remainder == 0)
msg.msg_flags &= ~MSG_MORE;
if (rmsize)
err = xs_send_rm_and_kvec(sock, &msg, rm,
&xdr->head[0], base);
else
err = xs_send_kvec(sock, &msg, &xdr->head[0], base);
if (remainder == 0 || err != len)
goto out;
*sent_p += err;
base = 0;
} else
base -= want;
if (base < xdr->page_len) {
unsigned int len = xdr->page_len - base;
remainder -= len;
if (remainder == 0)
msg.msg_flags &= ~MSG_MORE;
err = xs_send_pagedata(sock, &msg, xdr, base);
if (remainder == 0 || err != len)
goto out;
*sent_p += err;
base = 0;
} else
base -= xdr->page_len;
if (base >= xdr->tail[0].iov_len)
return 0;
msg.msg_flags &= ~MSG_MORE;
err = xs_send_kvec(sock, &msg, &xdr->tail[0], base);
out:
if (err > 0) {
*sent_p += err;
err = 0;
}
return err;
}
/** /**
* xs_nospace - handle transmit was incomplete * xs_nospace - handle transmit was incomplete
* @req: pointer to RPC request * @req: pointer to RPC request
@ -959,8 +841,11 @@ static int xs_local_send_request(struct rpc_rqst *req)
struct xdr_buf *xdr = &req->rq_snd_buf; struct xdr_buf *xdr = &req->rq_snd_buf;
rpc_fraghdr rm = xs_stream_record_marker(xdr); rpc_fraghdr rm = xs_stream_record_marker(xdr);
unsigned int msglen = rm ? req->rq_slen + sizeof(rm) : req->rq_slen; unsigned int msglen = rm ? req->rq_slen + sizeof(rm) : req->rq_slen;
struct msghdr msg = {
.msg_flags = XS_SENDMSG_FLAGS,
};
unsigned int uninitialized_var(sent);
int status; int status;
int sent = 0;
/* Close the stream if the previous transmission was incomplete */ /* Close the stream if the previous transmission was incomplete */
if (xs_send_request_was_aborted(transport, req)) { if (xs_send_request_was_aborted(transport, req)) {
@ -972,8 +857,8 @@ static int xs_local_send_request(struct rpc_rqst *req)
req->rq_svec->iov_base, req->rq_svec->iov_len); req->rq_svec->iov_base, req->rq_svec->iov_len);
req->rq_xtime = ktime_get(); req->rq_xtime = ktime_get();
status = xs_sendpages(transport->sock, NULL, 0, xdr, status = xprt_sock_sendmsg(transport->sock, &msg, xdr,
transport->xmit.offset, rm, &sent); transport->xmit.offset, rm, &sent);
dprintk("RPC: %s(%u) = %d\n", dprintk("RPC: %s(%u) = %d\n",
__func__, xdr->len - transport->xmit.offset, status); __func__, xdr->len - transport->xmit.offset, status);
@ -1025,7 +910,12 @@ static int xs_udp_send_request(struct rpc_rqst *req)
struct rpc_xprt *xprt = req->rq_xprt; struct rpc_xprt *xprt = req->rq_xprt;
struct sock_xprt *transport = container_of(xprt, struct sock_xprt, xprt); struct sock_xprt *transport = container_of(xprt, struct sock_xprt, xprt);
struct xdr_buf *xdr = &req->rq_snd_buf; struct xdr_buf *xdr = &req->rq_snd_buf;
int sent = 0; struct msghdr msg = {
.msg_name = xs_addr(xprt),
.msg_namelen = xprt->addrlen,
.msg_flags = XS_SENDMSG_FLAGS,
};
unsigned int uninitialized_var(sent);
int status; int status;
xs_pktdump("packet data:", xs_pktdump("packet data:",
@ -1039,8 +929,7 @@ static int xs_udp_send_request(struct rpc_rqst *req)
return -EBADSLT; return -EBADSLT;
req->rq_xtime = ktime_get(); req->rq_xtime = ktime_get();
status = xs_sendpages(transport->sock, xs_addr(xprt), xprt->addrlen, status = xprt_sock_sendmsg(transport->sock, &msg, xdr, 0, 0, &sent);
xdr, 0, 0, &sent);
dprintk("RPC: xs_udp_send_request(%u) = %d\n", dprintk("RPC: xs_udp_send_request(%u) = %d\n",
xdr->len, status); xdr->len, status);
@ -1106,9 +995,12 @@ static int xs_tcp_send_request(struct rpc_rqst *req)
struct xdr_buf *xdr = &req->rq_snd_buf; struct xdr_buf *xdr = &req->rq_snd_buf;
rpc_fraghdr rm = xs_stream_record_marker(xdr); rpc_fraghdr rm = xs_stream_record_marker(xdr);
unsigned int msglen = rm ? req->rq_slen + sizeof(rm) : req->rq_slen; unsigned int msglen = rm ? req->rq_slen + sizeof(rm) : req->rq_slen;
struct msghdr msg = {
.msg_flags = XS_SENDMSG_FLAGS,
};
bool vm_wait = false; bool vm_wait = false;
unsigned int uninitialized_var(sent);
int status; int status;
int sent;
/* Close the stream if the previous transmission was incomplete */ /* Close the stream if the previous transmission was incomplete */
if (xs_send_request_was_aborted(transport, req)) { if (xs_send_request_was_aborted(transport, req)) {
@ -1129,9 +1021,8 @@ static int xs_tcp_send_request(struct rpc_rqst *req)
* called sendmsg(). */ * called sendmsg(). */
req->rq_xtime = ktime_get(); req->rq_xtime = ktime_get();
while (1) { while (1) {
sent = 0; status = xprt_sock_sendmsg(transport->sock, &msg, xdr,
status = xs_sendpages(transport->sock, NULL, 0, xdr, transport->xmit.offset, rm, &sent);
transport->xmit.offset, rm, &sent);
dprintk("RPC: xs_tcp_send_request(%u) = %d\n", dprintk("RPC: xs_tcp_send_request(%u) = %d\n",
xdr->len - transport->xmit.offset, status); xdr->len - transport->xmit.offset, status);
@ -2636,46 +2527,25 @@ static void bc_free(struct rpc_task *task)
free_page((unsigned long)buf); free_page((unsigned long)buf);
} }
/*
* Use the svc_sock to send the callback. Must be called with svsk->sk_mutex
* held. Borrows heavily from svc_tcp_sendto and xs_tcp_send_request.
*/
static int bc_sendto(struct rpc_rqst *req) static int bc_sendto(struct rpc_rqst *req)
{ {
int len; struct xdr_buf *xdr = &req->rq_snd_buf;
struct xdr_buf *xbufp = &req->rq_snd_buf;
struct sock_xprt *transport = struct sock_xprt *transport =
container_of(req->rq_xprt, struct sock_xprt, xprt); container_of(req->rq_xprt, struct sock_xprt, xprt);
unsigned long headoff;
unsigned long tailoff;
struct page *tailpage;
struct msghdr msg = { struct msghdr msg = {
.msg_flags = MSG_MORE .msg_flags = 0,
}; };
rpc_fraghdr marker = cpu_to_be32(RPC_LAST_STREAM_FRAGMENT | rpc_fraghdr marker = cpu_to_be32(RPC_LAST_STREAM_FRAGMENT |
(u32)xbufp->len); (u32)xdr->len);
struct kvec iov = { unsigned int sent = 0;
.iov_base = &marker, int err;
.iov_len = sizeof(marker),
};
req->rq_xtime = ktime_get(); req->rq_xtime = ktime_get();
err = xprt_sock_sendmsg(transport->sock, &msg, xdr, 0, marker, &sent);
len = kernel_sendmsg(transport->sock, &msg, &iov, 1, iov.iov_len); xdr_free_bvec(xdr);
if (len != iov.iov_len) if (err < 0 || sent != (xdr->len + sizeof(marker)))
return -EAGAIN; return -EAGAIN;
return sent;
tailpage = NULL;
if (xbufp->tail[0].iov_len)
tailpage = virt_to_page(xbufp->tail[0].iov_base);
tailoff = (unsigned long)xbufp->tail[0].iov_base & ~PAGE_MASK;
headoff = (unsigned long)xbufp->head[0].iov_base & ~PAGE_MASK;
len = svc_send_common(transport->sock, xbufp,
virt_to_page(xbufp->head[0].iov_base), headoff,
tailpage, tailoff);
if (len != xbufp->len)
return -EAGAIN;
return len;
} }
/* /*