NFS client updates for Linux 5.5

Highlights include:
 
 Features:
 - NFSv4.2 now supports cross device offloaded copy (i.e. offloaded copy
   of a file from one source server to a different target server).
 - New RDMA tracepoints for debugging congestion control and Local Invalidate
   WRs.
 
 Bugfixes and cleanups
 - Drop the NFSv4.1 session slot if nfs4_delegreturn_prepare waits for
   layoutreturn
 - Handle bad/dead sessions correctly in nfs41_sequence_process()
 - Various bugfixes to the delegation return operation.
 - Various bugfixes pertaining to delegations that have been revoked.
 - Cleanups to the NFS timespec code to avoid unnecessary conversions
   between timespec and timespec64.
 - Fix unstable RDMA connections after a reconnect
 - Close race between waking an RDMA sender and posting a receive
 - Wake pending RDMA tasks if connection fails
 - Fix MR list corruption, and clean up MR usage
 - Fix another RPCSEC_GSS issue with MIC buffer space
 -----BEGIN PGP SIGNATURE-----
 
 iQIzBAABCAAdFiEESQctxSBg8JpV8KqEZwvnipYKAPIFAl3qp8QACgkQZwvnipYK
 APIZfBAAuFhLUA2Ua9OQOPDJDkQ1IFDBfYGG48aqVu3GIXS9LkEvTavLm/P9ocm+
 ijGsUv2iw4x9H4S7OGuzLQm5zmTNsQAlPXD+3+xQS7cjPjh5HCyIAEgpov+JEGae
 CeZoSvhtdBd0xB71t2zAKEdHkqc47Jxz3Db0FX22zTTnDvdhArfggisZUt4Xq5Qb
 cPcs8R1E5yBZqJFHKObOUP4itVYsXte/VFhtWpjRFqzaZ/t7xNpPVOBH8cli7aI9
 E6DqdbIjUreyn62FVWYIeGhwvsKdxv+Slc5ZOEbD45jUryovyCAZxhqDmcAg/0q0
 uykplL0cv8MeiZ68wmlxdir/n36hWduiGqa0UKMg2+BAbdudGKJ7xPhkGYP2uZqo
 zoZGjd+Hl8AunMBUaT7YAxWOzuIXeMP338szTL6sSBPxT75WmmNJAh3J4b22G7Bl
 eGrcJcckDBnvfRCia40l8g9NLHmVKqS9qNKxSWMlMlBmwd1HE0oEE1ddCx9bGHKe
 srf0S14RPQBRF6r+Nv0cx5S+CiptDtGiILR+cn5ZDra5YYCPX5kkJ6VEqw/m4yNE
 AKjjj5gim+jWYdBOTMU3u5KNNqFx37xnOCdC+5DvhMNWRHf2O/I5JSKtuKaZht+5
 PEuwcYfQvaZGp3fCEh38zzOX2qWUhRMbXUqSv5F0DbuWK7OAABQ=
 =VZFk
 -----END PGP SIGNATURE-----

Merge tag 'nfs-for-5.5-1' of git://git.linux-nfs.org/projects/trondmy/linux-nfs

Pull NFS client updates from Trond Myklebust:
 "Highlights include:

  Features:

   - NFSv4.2 now supports cross device offloaded copy (i.e. offloaded
     copy of a file from one source server to a different target
     server).

   - New RDMA tracepoints for debugging congestion control and Local
     Invalidate WRs.

  Bugfixes and cleanups

   - Drop the NFSv4.1 session slot if nfs4_delegreturn_prepare waits for
     layoutreturn

   - Handle bad/dead sessions correctly in nfs41_sequence_process()

   - Various bugfixes to the delegation return operation.

   - Various bugfixes pertaining to delegations that have been revoked.

   - Cleanups to the NFS timespec code to avoid unnecessary conversions
     between timespec and timespec64.

   - Fix unstable RDMA connections after a reconnect

   - Close race between waking an RDMA sender and posting a receive

   - Wake pending RDMA tasks if connection fails

   - Fix MR list corruption, and clean up MR usage

   - Fix another RPCSEC_GSS issue with MIC buffer space"

* tag 'nfs-for-5.5-1' of git://git.linux-nfs.org/projects/trondmy/linux-nfs: (79 commits)
  SUNRPC: Capture completion of all RPC tasks
  SUNRPC: Fix another issue with MIC buffer space
  NFS4: Trace lock reclaims
  NFS4: Trace state recovery operation
  NFSv4.2 fix memory leak in nfs42_ssc_open
  NFSv4.2 fix kfree in __nfs42_copy_file_range
  NFS: remove duplicated include from nfs4file.c
  NFSv4: Make _nfs42_proc_copy_notify() static
  NFS: Fallocate should use the nfs4_fattr_bitmap
  NFS: Return -ETXTBSY when attempting to write to a swapfile
  fs: nfs: sysfs: Remove NULL check before kfree
  NFS: remove unneeded semicolon
  NFSv4: add declaration of current_stateid
  NFSv4.x: Drop the slot if nfs4_delegreturn_prepare waits for layoutreturn
  NFSv4.x: Handle bad/dead sessions correctly in nfs41_sequence_process()
  nfsv4: Move NFSPROC4_CLNT_COPY_NOTIFY to end of list
  SUNRPC: Avoid RPC delays when exiting suspend
  NFS: Add a tracepoint in nfs_fh_to_dentry()
  NFSv4: Don't retry the GETATTR on old stateid in nfs4_delegreturn_done()
  NFSv4: Handle NFS4ERR_OLD_STATEID in delegreturn
  ...
This commit is contained in:
Linus Torvalds 2019-12-07 16:50:55 -08:00
commit fb9bf40cf0
49 changed files with 1773 additions and 639 deletions

View File

@ -464,7 +464,8 @@ nlm_bind_host(struct nlm_host *host)
.version = host->h_version,
.authflavor = RPC_AUTH_UNIX,
.flags = (RPC_CLNT_CREATE_NOPING |
RPC_CLNT_CREATE_AUTOBIND),
RPC_CLNT_CREATE_AUTOBIND |
RPC_CLNT_CREATE_REUSEPORT),
.cred = host->h_cred,
};

View File

@ -72,8 +72,8 @@ struct cb_getattrres {
uint32_t bitmap[2];
uint64_t size;
uint64_t change_attr;
struct timespec ctime;
struct timespec mtime;
struct timespec64 ctime;
struct timespec64 mtime;
};
struct cb_recallargs {

View File

@ -26,7 +26,6 @@ __be32 nfs4_callback_getattr(void *argp, void *resp,
struct cb_getattrargs *args = argp;
struct cb_getattrres *res = resp;
struct nfs_delegation *delegation;
struct nfs_inode *nfsi;
struct inode *inode;
res->status = htonl(NFS4ERR_OP_NOT_IN_SESSION);
@ -47,17 +46,16 @@ __be32 nfs4_callback_getattr(void *argp, void *resp,
-ntohl(res->status));
goto out;
}
nfsi = NFS_I(inode);
rcu_read_lock();
delegation = rcu_dereference(nfsi->delegation);
delegation = nfs4_get_valid_delegation(inode);
if (delegation == NULL || (delegation->type & FMODE_WRITE) == 0)
goto out_iput;
res->size = i_size_read(inode);
res->change_attr = delegation->change_attr;
if (nfs_have_writebacks(inode))
res->change_attr++;
res->ctime = timespec64_to_timespec(inode->i_ctime);
res->mtime = timespec64_to_timespec(inode->i_mtime);
res->ctime = inode->i_ctime;
res->mtime = inode->i_mtime;
res->bitmap[0] = (FATTR4_WORD0_CHANGE|FATTR4_WORD0_SIZE) &
args->bitmap[0];
res->bitmap[1] = (FATTR4_WORD1_TIME_METADATA|FATTR4_WORD1_TIME_MODIFY) &

View File

@ -627,7 +627,7 @@ static __be32 encode_attr_size(struct xdr_stream *xdr, const uint32_t *bitmap, u
return 0;
}
static __be32 encode_attr_time(struct xdr_stream *xdr, const struct timespec *time)
static __be32 encode_attr_time(struct xdr_stream *xdr, const struct timespec64 *time)
{
__be32 *p;
@ -639,14 +639,14 @@ static __be32 encode_attr_time(struct xdr_stream *xdr, const struct timespec *ti
return 0;
}
static __be32 encode_attr_ctime(struct xdr_stream *xdr, const uint32_t *bitmap, const struct timespec *time)
static __be32 encode_attr_ctime(struct xdr_stream *xdr, const uint32_t *bitmap, const struct timespec64 *time)
{
if (!(bitmap[1] & FATTR4_WORD1_TIME_METADATA))
return 0;
return encode_attr_time(xdr,time);
}
static __be32 encode_attr_mtime(struct xdr_stream *xdr, const uint32_t *bitmap, const struct timespec *time)
static __be32 encode_attr_mtime(struct xdr_stream *xdr, const uint32_t *bitmap, const struct timespec64 *time)
{
if (!(bitmap[1] & FATTR4_WORD1_TIME_MODIFY))
return 0;

View File

@ -312,6 +312,12 @@ again:
/* Match nfsv4 minorversion */
if (clp->cl_minorversion != data->minorversion)
continue;
/* Match request for a dedicated DS */
if (test_bit(NFS_CS_DS, &data->init_flags) !=
test_bit(NFS_CS_DS, &clp->cl_flags))
continue;
/* Match the full socket address */
if (!rpc_cmp_addr_port(sap, clap))
/* Match all xprt_switch full socket addresses */
@ -515,6 +521,10 @@ int nfs_create_rpc_client(struct nfs_client *clp,
args.flags |= RPC_CLNT_CREATE_NONPRIVPORT;
if (test_bit(NFS_CS_INFINITE_SLOTS, &clp->cl_flags))
args.flags |= RPC_CLNT_CREATE_INFINITE_SLOTS;
if (test_bit(NFS_CS_NOPING, &clp->cl_flags))
args.flags |= RPC_CLNT_CREATE_NOPING;
if (test_bit(NFS_CS_REUSEPORT, &clp->cl_flags))
args.flags |= RPC_CLNT_CREATE_REUSEPORT;
if (!IS_ERR(clp->cl_rpcclient))
return 0;
@ -662,6 +672,7 @@ static int nfs_init_server(struct nfs_server *server,
.timeparms = &timeparms,
.cred = server->cred,
.nconnect = data->nfs_server.nconnect,
.init_flags = (1UL << NFS_CS_REUSEPORT),
};
struct nfs_client *clp;
int error;

View File

@ -199,7 +199,7 @@ void nfs_inode_reclaim_delegation(struct inode *inode, const struct cred *cred,
delegation = rcu_dereference(NFS_I(inode)->delegation);
if (delegation != NULL) {
spin_lock(&delegation->lock);
if (delegation->inode != NULL) {
if (nfs4_is_valid_delegation(delegation, 0)) {
nfs4_stateid_copy(&delegation->stateid, stateid);
delegation->type = type;
delegation->pagemod_limit = pagemod_limit;
@ -229,7 +229,6 @@ static int nfs_do_return_delegation(struct inode *inode, struct nfs_delegation *
delegation->cred,
&delegation->stateid,
issync);
nfs_free_delegation(delegation);
return res;
}
@ -298,7 +297,10 @@ nfs_detach_delegation_locked(struct nfs_inode *nfsi,
return NULL;
spin_lock(&delegation->lock);
set_bit(NFS_DELEGATION_RETURNING, &delegation->flags);
if (!delegation->inode) {
spin_unlock(&delegation->lock);
return NULL;
}
list_del_rcu(&delegation->super_list);
delegation->inode = NULL;
rcu_assign_pointer(nfsi->delegation, NULL);
@ -325,10 +327,12 @@ nfs_inode_detach_delegation(struct inode *inode)
struct nfs_server *server = NFS_SERVER(inode);
struct nfs_delegation *delegation;
delegation = nfs_start_delegation_return(nfsi);
if (delegation == NULL)
return NULL;
return nfs_detach_delegation(nfsi, delegation, server);
rcu_read_lock();
delegation = rcu_dereference(nfsi->delegation);
if (delegation != NULL)
delegation = nfs_detach_delegation(nfsi, delegation, server);
rcu_read_unlock();
return delegation;
}
static void
@ -339,6 +343,7 @@ nfs_update_inplace_delegation(struct nfs_delegation *delegation,
delegation->stateid.seqid = update->stateid.seqid;
smp_wmb();
delegation->type = update->type;
clear_bit(NFS_DELEGATION_REVOKED, &delegation->flags);
}
}
@ -379,14 +384,18 @@ int nfs_inode_set_delegation(struct inode *inode, const struct cred *cred,
spin_lock(&clp->cl_lock);
old_delegation = rcu_dereference_protected(nfsi->delegation,
lockdep_is_held(&clp->cl_lock));
if (old_delegation != NULL) {
/* Is this an update of the existing delegation? */
if (nfs4_stateid_match_other(&old_delegation->stateid,
&delegation->stateid)) {
nfs_update_inplace_delegation(old_delegation,
delegation);
goto out;
}
if (old_delegation == NULL)
goto add_new;
/* Is this an update of the existing delegation? */
if (nfs4_stateid_match_other(&old_delegation->stateid,
&delegation->stateid)) {
spin_lock(&old_delegation->lock);
nfs_update_inplace_delegation(old_delegation,
delegation);
spin_unlock(&old_delegation->lock);
goto out;
}
if (!test_bit(NFS_DELEGATION_REVOKED, &old_delegation->flags)) {
/*
* Deal with broken servers that hand out two
* delegations for the same file.
@ -405,11 +414,11 @@ int nfs_inode_set_delegation(struct inode *inode, const struct cred *cred,
if (test_and_set_bit(NFS_DELEGATION_RETURNING,
&old_delegation->flags))
goto out;
freeme = nfs_detach_delegation_locked(nfsi,
old_delegation, clp);
if (freeme == NULL)
goto out;
}
freeme = nfs_detach_delegation_locked(nfsi, old_delegation, clp);
if (freeme == NULL)
goto out;
add_new:
list_add_tail_rcu(&delegation->super_list, &server->delegations);
rcu_assign_pointer(nfsi->delegation, delegation);
delegation = NULL;
@ -424,8 +433,10 @@ out:
spin_unlock(&clp->cl_lock);
if (delegation != NULL)
nfs_free_delegation(delegation);
if (freeme != NULL)
if (freeme != NULL) {
nfs_do_return_delegation(inode, freeme, 0);
nfs_free_delegation(freeme);
}
return status;
}
@ -435,7 +446,6 @@ out:
static int nfs_end_delegation_return(struct inode *inode, struct nfs_delegation *delegation, int issync)
{
struct nfs_client *clp = NFS_SERVER(inode)->nfs_client;
struct nfs_inode *nfsi = NFS_I(inode);
int err = 0;
if (delegation == NULL)
@ -457,8 +467,6 @@ static int nfs_end_delegation_return(struct inode *inode, struct nfs_delegation
nfs_abort_delegation_return(delegation, clp);
goto out;
}
if (!nfs_detach_delegation(nfsi, delegation, NFS_SERVER(inode)))
goto out;
err = nfs_do_return_delegation(inode, delegation, issync);
out:
@ -469,8 +477,6 @@ static bool nfs_delegation_need_return(struct nfs_delegation *delegation)
{
bool ret = false;
if (test_bit(NFS_DELEGATION_RETURNING, &delegation->flags))
goto out;
if (test_and_clear_bit(NFS_DELEGATION_RETURN, &delegation->flags))
ret = true;
if (test_and_clear_bit(NFS_DELEGATION_RETURN_IF_CLOSED, &delegation->flags) && !ret) {
@ -482,7 +488,10 @@ static bool nfs_delegation_need_return(struct nfs_delegation *delegation)
ret = true;
spin_unlock(&delegation->lock);
}
out:
if (test_bit(NFS_DELEGATION_RETURNING, &delegation->flags) ||
test_bit(NFS_DELEGATION_REVOKED, &delegation->flags))
ret = false;
return ret;
}
@ -585,19 +594,23 @@ restart:
}
/**
* nfs_inode_return_delegation_noreclaim - return delegation, don't reclaim opens
* nfs_inode_evict_delegation - return delegation, don't reclaim opens
* @inode: inode to process
*
* Does not protect against delegation reclaims, therefore really only safe
* to be called from nfs4_clear_inode().
* to be called from nfs4_clear_inode(). Guaranteed to always free
* the delegation structure.
*/
void nfs_inode_return_delegation_noreclaim(struct inode *inode)
void nfs_inode_evict_delegation(struct inode *inode)
{
struct nfs_delegation *delegation;
delegation = nfs_inode_detach_delegation(inode);
if (delegation != NULL)
if (delegation != NULL) {
set_bit(NFS_DELEGATION_INODE_FREEING, &delegation->flags);
nfs_do_return_delegation(inode, delegation, 1);
nfs_free_delegation(delegation);
}
}
/**
@ -633,10 +646,18 @@ int nfs4_inode_return_delegation(struct inode *inode)
*/
int nfs4_inode_make_writeable(struct inode *inode)
{
if (!nfs4_has_session(NFS_SERVER(inode)->nfs_client) ||
!nfs4_check_delegation(inode, FMODE_WRITE))
return nfs4_inode_return_delegation(inode);
return 0;
struct nfs_delegation *delegation;
rcu_read_lock();
delegation = nfs4_get_valid_delegation(inode);
if (delegation == NULL ||
(nfs4_has_session(NFS_SERVER(inode)->nfs_client) &&
(delegation->type & FMODE_WRITE))) {
rcu_read_unlock();
return 0;
}
rcu_read_unlock();
return nfs4_inode_return_delegation(inode);
}
static void nfs_mark_return_if_closed_delegation(struct nfs_server *server,
@ -744,10 +765,9 @@ static void nfs_mark_delegation_revoked(struct nfs_server *server,
{
set_bit(NFS_DELEGATION_REVOKED, &delegation->flags);
delegation->stateid.type = NFS4_INVALID_STATEID_TYPE;
nfs_mark_return_delegation(server, delegation);
}
static bool nfs_revoke_delegation(struct inode *inode,
static void nfs_revoke_delegation(struct inode *inode,
const nfs4_stateid *stateid)
{
struct nfs_delegation *delegation;
@ -761,30 +781,70 @@ static bool nfs_revoke_delegation(struct inode *inode,
if (stateid == NULL) {
nfs4_stateid_copy(&tmp, &delegation->stateid);
stateid = &tmp;
} else if (!nfs4_stateid_match(stateid, &delegation->stateid))
goto out;
} else {
if (!nfs4_stateid_match_other(stateid, &delegation->stateid))
goto out;
spin_lock(&delegation->lock);
if (stateid->seqid) {
if (nfs4_stateid_is_newer(&delegation->stateid, stateid)) {
spin_unlock(&delegation->lock);
goto out;
}
delegation->stateid.seqid = stateid->seqid;
}
spin_unlock(&delegation->lock);
}
nfs_mark_delegation_revoked(NFS_SERVER(inode), delegation);
ret = true;
out:
rcu_read_unlock();
if (ret)
nfs_inode_find_state_and_recover(inode, stateid);
return ret;
}
void nfs_remove_bad_delegation(struct inode *inode,
const nfs4_stateid *stateid)
{
struct nfs_delegation *delegation;
if (!nfs_revoke_delegation(inode, stateid))
return;
delegation = nfs_inode_detach_delegation(inode);
if (delegation)
nfs_free_delegation(delegation);
nfs_revoke_delegation(inode, stateid);
}
EXPORT_SYMBOL_GPL(nfs_remove_bad_delegation);
void nfs_delegation_mark_returned(struct inode *inode,
const nfs4_stateid *stateid)
{
struct nfs_delegation *delegation;
if (!inode)
return;
rcu_read_lock();
delegation = rcu_dereference(NFS_I(inode)->delegation);
if (!delegation)
goto out_rcu_unlock;
spin_lock(&delegation->lock);
if (!nfs4_stateid_match_other(stateid, &delegation->stateid))
goto out_spin_unlock;
if (stateid->seqid) {
/* If delegation->stateid is newer, dont mark as returned */
if (nfs4_stateid_is_newer(&delegation->stateid, stateid))
goto out_clear_returning;
if (delegation->stateid.seqid != stateid->seqid)
delegation->stateid.seqid = stateid->seqid;
}
nfs_mark_delegation_revoked(NFS_SERVER(inode), delegation);
out_clear_returning:
clear_bit(NFS_DELEGATION_RETURNING, &delegation->flags);
out_spin_unlock:
spin_unlock(&delegation->lock);
out_rcu_unlock:
rcu_read_unlock();
nfs_inode_find_state_and_recover(inode, stateid);
}
/**
* nfs_expire_unused_delegation_types
* @clp: client to process
@ -840,7 +900,7 @@ int nfs_async_inode_return_delegation(struct inode *inode,
struct nfs_delegation *delegation;
rcu_read_lock();
delegation = rcu_dereference(NFS_I(inode)->delegation);
delegation = nfs4_get_valid_delegation(inode);
if (delegation == NULL)
goto out_enoent;
if (stateid != NULL &&
@ -866,6 +926,7 @@ nfs_delegation_find_inode_server(struct nfs_server *server,
list_for_each_entry_rcu(delegation, &server->delegations, super_list) {
spin_lock(&delegation->lock);
if (delegation->inode != NULL &&
!test_bit(NFS_DELEGATION_REVOKED, &delegation->flags) &&
nfs_compare_fh(fhandle, &NFS_I(delegation->inode)->fh) == 0) {
freeme = igrab(delegation->inode);
if (freeme && nfs_sb_active(freeme->i_sb))
@ -1140,7 +1201,8 @@ void nfs_inode_find_delegation_state_and_recover(struct inode *inode,
rcu_read_lock();
delegation = rcu_dereference(NFS_I(inode)->delegation);
if (delegation &&
nfs4_stateid_match_other(&delegation->stateid, stateid)) {
nfs4_stateid_match_or_older(&delegation->stateid, stateid) &&
!test_bit(NFS_DELEGATION_REVOKED, &delegation->flags)) {
nfs_mark_test_expired_delegation(NFS_SERVER(inode), delegation);
found = true;
}
@ -1189,7 +1251,9 @@ bool nfs4_refresh_delegation_stateid(nfs4_stateid *dst, struct inode *inode)
rcu_read_lock();
delegation = rcu_dereference(NFS_I(inode)->delegation);
if (delegation != NULL &&
nfs4_stateid_match_other(dst, &delegation->stateid)) {
nfs4_stateid_match_other(dst, &delegation->stateid) &&
nfs4_stateid_is_newer(&delegation->stateid, dst) &&
!test_bit(NFS_DELEGATION_REVOKED, &delegation->flags)) {
dst->seqid = delegation->stateid.seqid;
ret = true;
}

View File

@ -43,7 +43,7 @@ void nfs_inode_reclaim_delegation(struct inode *inode, const struct cred *cred,
fmode_t type, const nfs4_stateid *stateid, unsigned long pagemod_limit);
int nfs4_inode_return_delegation(struct inode *inode);
int nfs_async_inode_return_delegation(struct inode *inode, const nfs4_stateid *stateid);
void nfs_inode_return_delegation_noreclaim(struct inode *inode);
void nfs_inode_evict_delegation(struct inode *inode);
struct inode *nfs_delegation_find_inode(struct nfs_client *clp, const struct nfs_fh *fhandle);
void nfs_server_return_all_delegations(struct nfs_server *);
@ -53,6 +53,7 @@ void nfs_expire_unreferenced_delegations(struct nfs_client *clp);
int nfs_client_return_marked_delegations(struct nfs_client *clp);
int nfs_delegations_present(struct nfs_client *clp);
void nfs_remove_bad_delegation(struct inode *inode, const nfs4_stateid *stateid);
void nfs_delegation_mark_returned(struct inode *inode, const nfs4_stateid *stateid);
void nfs_delegation_mark_reclaim(struct nfs_client *clp);
void nfs_delegation_reap_unclaimed(struct nfs_client *clp);

View File

@ -105,6 +105,7 @@ nfs_fh_to_dentry(struct super_block *sb, struct fid *fid,
ret = rpc_ops->getattr(NFS_SB(sb), server_fh, fattr, label, NULL);
if (ret) {
dprintk("%s: getattr failed %d\n", __func__, ret);
trace_nfs_fh_to_dentry(sb, server_fh, fattr->fileid, ret);
dentry = ERR_PTR(ret);
goto out_free_label;
}

View File

@ -649,7 +649,7 @@ out:
out_swapfile:
printk(KERN_INFO "NFS: attempt to write to active swap file!\n");
return -EBUSY;
return -ETXTBSY;
}
EXPORT_SYMBOL_GPL(nfs_file_write);

View File

@ -504,15 +504,15 @@ nfs_fhget(struct super_block *sb, struct nfs_fh *fh, struct nfs_fattr *fattr, st
nfsi->read_cache_jiffies = fattr->time_start;
nfsi->attr_gencount = fattr->gencount;
if (fattr->valid & NFS_ATTR_FATTR_ATIME)
inode->i_atime = timespec_to_timespec64(fattr->atime);
inode->i_atime = fattr->atime;
else if (nfs_server_capable(inode, NFS_CAP_ATIME))
nfs_set_cache_invalid(inode, NFS_INO_INVALID_ATIME);
if (fattr->valid & NFS_ATTR_FATTR_MTIME)
inode->i_mtime = timespec_to_timespec64(fattr->mtime);
inode->i_mtime = fattr->mtime;
else if (nfs_server_capable(inode, NFS_CAP_MTIME))
nfs_set_cache_invalid(inode, NFS_INO_INVALID_MTIME);
if (fattr->valid & NFS_ATTR_FATTR_CTIME)
inode->i_ctime = timespec_to_timespec64(fattr->ctime);
inode->i_ctime = fattr->ctime;
else if (nfs_server_capable(inode, NFS_CAP_CTIME))
nfs_set_cache_invalid(inode, NFS_INO_INVALID_CTIME);
if (fattr->valid & NFS_ATTR_FATTR_CHANGE)
@ -698,7 +698,7 @@ void nfs_setattr_update_inode(struct inode *inode, struct iattr *attr,
if ((attr->ia_valid & ATTR_GID) != 0)
inode->i_gid = attr->ia_gid;
if (fattr->valid & NFS_ATTR_FATTR_CTIME)
inode->i_ctime = timespec_to_timespec64(fattr->ctime);
inode->i_ctime = fattr->ctime;
else
nfs_set_cache_invalid(inode, NFS_INO_INVALID_CHANGE
| NFS_INO_INVALID_CTIME);
@ -709,14 +709,14 @@ void nfs_setattr_update_inode(struct inode *inode, struct iattr *attr,
NFS_I(inode)->cache_validity &= ~(NFS_INO_INVALID_ATIME
| NFS_INO_INVALID_CTIME);
if (fattr->valid & NFS_ATTR_FATTR_ATIME)
inode->i_atime = timespec_to_timespec64(fattr->atime);
inode->i_atime = fattr->atime;
else if (attr->ia_valid & ATTR_ATIME_SET)
inode->i_atime = attr->ia_atime;
else
nfs_set_cache_invalid(inode, NFS_INO_INVALID_ATIME);
if (fattr->valid & NFS_ATTR_FATTR_CTIME)
inode->i_ctime = timespec_to_timespec64(fattr->ctime);
inode->i_ctime = fattr->ctime;
else
nfs_set_cache_invalid(inode, NFS_INO_INVALID_CHANGE
| NFS_INO_INVALID_CTIME);
@ -725,14 +725,14 @@ void nfs_setattr_update_inode(struct inode *inode, struct iattr *attr,
NFS_I(inode)->cache_validity &= ~(NFS_INO_INVALID_MTIME
| NFS_INO_INVALID_CTIME);
if (fattr->valid & NFS_ATTR_FATTR_MTIME)
inode->i_mtime = timespec_to_timespec64(fattr->mtime);
inode->i_mtime = fattr->mtime;
else if (attr->ia_valid & ATTR_MTIME_SET)
inode->i_mtime = attr->ia_mtime;
else
nfs_set_cache_invalid(inode, NFS_INO_INVALID_MTIME);
if (fattr->valid & NFS_ATTR_FATTR_CTIME)
inode->i_ctime = timespec_to_timespec64(fattr->ctime);
inode->i_ctime = fattr->ctime;
else
nfs_set_cache_invalid(inode, NFS_INO_INVALID_CHANGE
| NFS_INO_INVALID_CTIME);
@ -1351,7 +1351,7 @@ static bool nfs_file_has_buffered_writers(struct nfs_inode *nfsi)
static void nfs_wcc_update_inode(struct inode *inode, struct nfs_fattr *fattr)
{
struct timespec ts;
struct timespec64 ts;
if ((fattr->valid & NFS_ATTR_FATTR_PRECHANGE)
&& (fattr->valid & NFS_ATTR_FATTR_CHANGE)
@ -1361,18 +1361,18 @@ static void nfs_wcc_update_inode(struct inode *inode, struct nfs_fattr *fattr)
nfs_set_cache_invalid(inode, NFS_INO_INVALID_DATA);
}
/* If we have atomic WCC data, we may update some attributes */
ts = timespec64_to_timespec(inode->i_ctime);
ts = inode->i_ctime;
if ((fattr->valid & NFS_ATTR_FATTR_PRECTIME)
&& (fattr->valid & NFS_ATTR_FATTR_CTIME)
&& timespec_equal(&ts, &fattr->pre_ctime)) {
inode->i_ctime = timespec_to_timespec64(fattr->ctime);
&& timespec64_equal(&ts, &fattr->pre_ctime)) {
inode->i_ctime = fattr->ctime;
}
ts = timespec64_to_timespec(inode->i_mtime);
ts = inode->i_mtime;
if ((fattr->valid & NFS_ATTR_FATTR_PREMTIME)
&& (fattr->valid & NFS_ATTR_FATTR_MTIME)
&& timespec_equal(&ts, &fattr->pre_mtime)) {
inode->i_mtime = timespec_to_timespec64(fattr->mtime);
&& timespec64_equal(&ts, &fattr->pre_mtime)) {
inode->i_mtime = fattr->mtime;
if (S_ISDIR(inode->i_mode))
nfs_set_cache_invalid(inode, NFS_INO_INVALID_DATA);
}
@ -1398,7 +1398,7 @@ static int nfs_check_inode_attributes(struct inode *inode, struct nfs_fattr *fat
struct nfs_inode *nfsi = NFS_I(inode);
loff_t cur_size, new_isize;
unsigned long invalid = 0;
struct timespec ts;
struct timespec64 ts;
if (NFS_PROTO(inode)->have_delegation(inode, FMODE_READ))
return 0;
@ -1425,12 +1425,12 @@ static int nfs_check_inode_attributes(struct inode *inode, struct nfs_fattr *fat
invalid |= NFS_INO_INVALID_CHANGE
| NFS_INO_REVAL_PAGECACHE;
ts = timespec64_to_timespec(inode->i_mtime);
if ((fattr->valid & NFS_ATTR_FATTR_MTIME) && !timespec_equal(&ts, &fattr->mtime))
ts = inode->i_mtime;
if ((fattr->valid & NFS_ATTR_FATTR_MTIME) && !timespec64_equal(&ts, &fattr->mtime))
invalid |= NFS_INO_INVALID_MTIME;
ts = timespec64_to_timespec(inode->i_ctime);
if ((fattr->valid & NFS_ATTR_FATTR_CTIME) && !timespec_equal(&ts, &fattr->ctime))
ts = inode->i_ctime;
if ((fattr->valid & NFS_ATTR_FATTR_CTIME) && !timespec64_equal(&ts, &fattr->ctime))
invalid |= NFS_INO_INVALID_CTIME;
if (fattr->valid & NFS_ATTR_FATTR_SIZE) {
@ -1460,8 +1460,8 @@ static int nfs_check_inode_attributes(struct inode *inode, struct nfs_fattr *fat
if ((fattr->valid & NFS_ATTR_FATTR_NLINK) && inode->i_nlink != fattr->nlink)
invalid |= NFS_INO_INVALID_OTHER;
ts = timespec64_to_timespec(inode->i_atime);
if ((fattr->valid & NFS_ATTR_FATTR_ATIME) && !timespec_equal(&ts, &fattr->atime))
ts = inode->i_atime;
if ((fattr->valid & NFS_ATTR_FATTR_ATIME) && !timespec64_equal(&ts, &fattr->atime))
invalid |= NFS_INO_INVALID_ATIME;
if (invalid != 0)
@ -1733,12 +1733,12 @@ int nfs_post_op_update_inode_force_wcc_locked(struct inode *inode, struct nfs_fa
}
if ((fattr->valid & NFS_ATTR_FATTR_CTIME) != 0 &&
(fattr->valid & NFS_ATTR_FATTR_PRECTIME) == 0) {
fattr->pre_ctime = timespec64_to_timespec(inode->i_ctime);
fattr->pre_ctime = inode->i_ctime;
fattr->valid |= NFS_ATTR_FATTR_PRECTIME;
}
if ((fattr->valid & NFS_ATTR_FATTR_MTIME) != 0 &&
(fattr->valid & NFS_ATTR_FATTR_PREMTIME) == 0) {
fattr->pre_mtime = timespec64_to_timespec(inode->i_mtime);
fattr->pre_mtime = inode->i_mtime;
fattr->valid |= NFS_ATTR_FATTR_PREMTIME;
}
if ((fattr->valid & NFS_ATTR_FATTR_SIZE) != 0 &&
@ -1899,7 +1899,7 @@ static int nfs_update_inode(struct inode *inode, struct nfs_fattr *fattr)
}
if (fattr->valid & NFS_ATTR_FATTR_MTIME) {
inode->i_mtime = timespec_to_timespec64(fattr->mtime);
inode->i_mtime = fattr->mtime;
} else if (server->caps & NFS_CAP_MTIME) {
nfsi->cache_validity |= save_cache_validity &
(NFS_INO_INVALID_MTIME
@ -1908,7 +1908,7 @@ static int nfs_update_inode(struct inode *inode, struct nfs_fattr *fattr)
}
if (fattr->valid & NFS_ATTR_FATTR_CTIME) {
inode->i_ctime = timespec_to_timespec64(fattr->ctime);
inode->i_ctime = fattr->ctime;
} else if (server->caps & NFS_CAP_CTIME) {
nfsi->cache_validity |= save_cache_validity &
(NFS_INO_INVALID_CTIME
@ -1946,7 +1946,7 @@ static int nfs_update_inode(struct inode *inode, struct nfs_fattr *fattr)
if (fattr->valid & NFS_ATTR_FATTR_ATIME)
inode->i_atime = timespec_to_timespec64(fattr->atime);
inode->i_atime = fattr->atime;
else if (server->caps & NFS_CAP_ATIME) {
nfsi->cache_validity |= save_cache_validity &
(NFS_INO_INVALID_ATIME

View File

@ -713,7 +713,7 @@ unsigned int nfs_page_array_len(unsigned int base, size_t len)
* 1024*1024*1024.
*/
static inline
u64 nfs_timespec_to_change_attr(const struct timespec *ts)
u64 nfs_timespec_to_change_attr(const struct timespec64 *ts)
{
return ((u64)ts->tv_sec << 30) + ts->tv_nsec;
}

View File

@ -157,6 +157,9 @@ struct vfsmount *nfs_d_automount(struct path *path)
if (IS_ERR(mnt))
goto out;
if (nfs_mountpoint_expiry_timeout < 0)
goto out;
mntget(mnt); /* prevent immediate expiration */
mnt_set_expiry(mnt, &nfs_automount_list);
schedule_delayed_work(&nfs_automount_task, nfs_mountpoint_expiry_timeout);

View File

@ -209,9 +209,9 @@ static int decode_fhandle(struct xdr_stream *xdr, struct nfs_fh *fh)
* unsigned int useconds;
* };
*/
static __be32 *xdr_encode_time(__be32 *p, const struct timespec *timep)
static __be32 *xdr_encode_time(__be32 *p, const struct timespec64 *timep)
{
*p++ = cpu_to_be32(timep->tv_sec);
*p++ = cpu_to_be32((u32)timep->tv_sec);
if (timep->tv_nsec != 0)
*p++ = cpu_to_be32(timep->tv_nsec / NSEC_PER_USEC);
else
@ -227,14 +227,14 @@ static __be32 *xdr_encode_time(__be32 *p, const struct timespec *timep)
* Illustrated" by Brent Callaghan, Addison-Wesley, ISBN 0-201-32750-5.
*/
static __be32 *xdr_encode_current_server_time(__be32 *p,
const struct timespec *timep)
const struct timespec64 *timep)
{
*p++ = cpu_to_be32(timep->tv_sec);
*p++ = cpu_to_be32(1000000);
return p;
}
static __be32 *xdr_decode_time(__be32 *p, struct timespec *timep)
static __be32 *xdr_decode_time(__be32 *p, struct timespec64 *timep)
{
timep->tv_sec = be32_to_cpup(p++);
timep->tv_nsec = be32_to_cpup(p++) * NSEC_PER_USEC;
@ -339,7 +339,6 @@ static __be32 *xdr_time_not_set(__be32 *p)
static void encode_sattr(struct xdr_stream *xdr, const struct iattr *attr,
struct user_namespace *userns)
{
struct timespec ts;
__be32 *p;
p = xdr_reserve_space(xdr, NFS_sattr_sz << 2);
@ -362,19 +361,15 @@ static void encode_sattr(struct xdr_stream *xdr, const struct iattr *attr,
*p++ = cpu_to_be32(NFS2_SATTR_NOT_SET);
if (attr->ia_valid & ATTR_ATIME_SET) {
ts = timespec64_to_timespec(attr->ia_atime);
p = xdr_encode_time(p, &ts);
p = xdr_encode_time(p, &attr->ia_atime);
} else if (attr->ia_valid & ATTR_ATIME) {
ts = timespec64_to_timespec(attr->ia_atime);
p = xdr_encode_current_server_time(p, &ts);
p = xdr_encode_current_server_time(p, &attr->ia_atime);
} else
p = xdr_time_not_set(p);
if (attr->ia_valid & ATTR_MTIME_SET) {
ts = timespec64_to_timespec(attr->ia_atime);
xdr_encode_time(p, &ts);
xdr_encode_time(p, &attr->ia_mtime);
} else if (attr->ia_valid & ATTR_MTIME) {
ts = timespec64_to_timespec(attr->ia_mtime);
xdr_encode_current_server_time(p, &ts);
xdr_encode_current_server_time(p, &attr->ia_mtime);
} else
xdr_time_not_set(p);
}

View File

@ -106,7 +106,10 @@ struct nfs_client *nfs3_set_ds_client(struct nfs_server *mds_srv,
cl_init.nconnect = mds_clp->cl_nconnect;
if (mds_srv->flags & NFS_MOUNT_NORESVPORT)
set_bit(NFS_CS_NORESVPORT, &cl_init.init_flags);
__set_bit(NFS_CS_NORESVPORT, &cl_init.init_flags);
__set_bit(NFS_CS_NOPING, &cl_init.init_flags);
__set_bit(NFS_CS_DS, &cl_init.init_flags);
/* Use the MDS nfs_client cl_ipaddr. */
nfs_init_timeout_values(&ds_timeout, ds_proto, ds_timeo, ds_retrans);

View File

@ -456,14 +456,14 @@ static void zero_nfs_fh3(struct nfs_fh *fh)
* uint32 nseconds;
* };
*/
static __be32 *xdr_encode_nfstime3(__be32 *p, const struct timespec *timep)
static __be32 *xdr_encode_nfstime3(__be32 *p, const struct timespec64 *timep)
{
*p++ = cpu_to_be32(timep->tv_sec);
*p++ = cpu_to_be32((u32)timep->tv_sec);
*p++ = cpu_to_be32(timep->tv_nsec);
return p;
}
static __be32 *xdr_decode_nfstime3(__be32 *p, struct timespec *timep)
static __be32 *xdr_decode_nfstime3(__be32 *p, struct timespec64 *timep)
{
timep->tv_sec = be32_to_cpup(p++);
timep->tv_nsec = be32_to_cpup(p++);
@ -533,7 +533,6 @@ static __be32 *xdr_decode_nfstime3(__be32 *p, struct timespec *timep)
static void encode_sattr3(struct xdr_stream *xdr, const struct iattr *attr,
struct user_namespace *userns)
{
struct timespec ts;
u32 nbytes;
__be32 *p;
@ -583,10 +582,8 @@ static void encode_sattr3(struct xdr_stream *xdr, const struct iattr *attr,
*p++ = xdr_zero;
if (attr->ia_valid & ATTR_ATIME_SET) {
struct timespec ts;
*p++ = xdr_two;
ts = timespec64_to_timespec(attr->ia_atime);
p = xdr_encode_nfstime3(p, &ts);
p = xdr_encode_nfstime3(p, &attr->ia_atime);
} else if (attr->ia_valid & ATTR_ATIME) {
*p++ = xdr_one;
} else
@ -594,8 +591,7 @@ static void encode_sattr3(struct xdr_stream *xdr, const struct iattr *attr,
if (attr->ia_valid & ATTR_MTIME_SET) {
*p++ = xdr_two;
ts = timespec64_to_timespec(attr->ia_mtime);
xdr_encode_nfstime3(p, &ts);
xdr_encode_nfstime3(p, &attr->ia_mtime);
} else if (attr->ia_valid & ATTR_MTIME) {
*p = xdr_one;
} else

View File

@ -13,8 +13,10 @@
#define PNFS_LAYOUTSTATS_MAXDEV (4)
/* nfs4.2proc.c */
#ifdef CONFIG_NFS_V4_2
int nfs42_proc_allocate(struct file *, loff_t, loff_t);
ssize_t nfs42_proc_copy(struct file *, loff_t, struct file *, loff_t, size_t);
ssize_t nfs42_proc_copy(struct file *, loff_t, struct file *, loff_t, size_t,
struct nl4_server *, nfs4_stateid *, bool);
int nfs42_proc_deallocate(struct file *, loff_t, loff_t);
loff_t nfs42_proc_llseek(struct file *, loff_t, int);
int nfs42_proc_layoutstats_generic(struct nfs_server *,
@ -23,5 +25,16 @@ int nfs42_proc_clone(struct file *, struct file *, loff_t, loff_t, loff_t);
int nfs42_proc_layouterror(struct pnfs_layout_segment *lseg,
const struct nfs42_layout_error *errors,
size_t n);
int nfs42_proc_copy_notify(struct file *, struct file *,
struct nfs42_copy_notify_res *);
static inline bool nfs42_files_from_same_server(struct file *in,
struct file *out)
{
struct nfs_client *c_in = (NFS_SERVER(file_inode(in)))->nfs_client;
struct nfs_client *c_out = (NFS_SERVER(file_inode(out)))->nfs_client;
return nfs4_check_serverowner_major_id(c_in->cl_serverowner,
c_out->cl_serverowner);
}
#endif /* CONFIG_NFS_V4_2 */
#endif /* __LINUX_FS_NFS_NFS4_2_H */

View File

@ -3,6 +3,7 @@
* Copyright (c) 2014 Anna Schumaker <Anna.Schumaker@Netapp.com>
*/
#include <linux/fs.h>
#include <linux/sunrpc/addr.h>
#include <linux/sunrpc/sched.h>
#include <linux/nfs.h>
#include <linux/nfs3.h>
@ -15,10 +16,30 @@
#include "pnfs.h"
#include "nfs4session.h"
#include "internal.h"
#include "delegation.h"
#define NFSDBG_FACILITY NFSDBG_PROC
static int nfs42_do_offload_cancel_async(struct file *dst, nfs4_stateid *std);
static void nfs42_set_netaddr(struct file *filep, struct nfs42_netaddr *naddr)
{
struct nfs_client *clp = (NFS_SERVER(file_inode(filep)))->nfs_client;
unsigned short port = 2049;
rcu_read_lock();
naddr->netid_len = scnprintf(naddr->netid,
sizeof(naddr->netid), "%s",
rpc_peeraddr2str(clp->cl_rpcclient,
RPC_DISPLAY_NETID));
naddr->addr_len = scnprintf(naddr->addr,
sizeof(naddr->addr),
"%s.%u.%u",
rpc_peeraddr2str(clp->cl_rpcclient,
RPC_DISPLAY_ADDR),
port >> 8, port & 255);
rcu_read_unlock();
}
static int _nfs42_proc_fallocate(struct rpc_message *msg, struct file *filep,
struct nfs_lock_context *lock, loff_t offset, loff_t len)
{
@ -28,7 +49,7 @@ static int _nfs42_proc_fallocate(struct rpc_message *msg, struct file *filep,
.falloc_fh = NFS_FH(inode),
.falloc_offset = offset,
.falloc_length = len,
.falloc_bitmask = server->cache_consistency_bitmask,
.falloc_bitmask = nfs4_fattr_bitmap,
};
struct nfs42_falloc_res res = {
.falloc_server = server,
@ -132,22 +153,26 @@ out_unlock:
}
static int handle_async_copy(struct nfs42_copy_res *res,
struct nfs_server *server,
struct nfs_server *dst_server,
struct nfs_server *src_server,
struct file *src,
struct file *dst,
nfs4_stateid *src_stateid)
nfs4_stateid *src_stateid,
bool *restart)
{
struct nfs4_copy_state *copy, *tmp_copy;
int status = NFS4_OK;
bool found_pending = false;
struct nfs_open_context *ctx = nfs_file_open_context(dst);
struct nfs_open_context *dst_ctx = nfs_file_open_context(dst);
struct nfs_open_context *src_ctx = nfs_file_open_context(src);
copy = kzalloc(sizeof(struct nfs4_copy_state), GFP_NOFS);
if (!copy)
return -ENOMEM;
spin_lock(&server->nfs_client->cl_lock);
list_for_each_entry(tmp_copy, &server->nfs_client->pending_cb_stateids,
spin_lock(&dst_server->nfs_client->cl_lock);
list_for_each_entry(tmp_copy,
&dst_server->nfs_client->pending_cb_stateids,
copies) {
if (memcmp(&res->write_res.stateid, &tmp_copy->stateid,
NFS4_STATEID_SIZE))
@ -157,7 +182,7 @@ static int handle_async_copy(struct nfs42_copy_res *res,
break;
}
if (found_pending) {
spin_unlock(&server->nfs_client->cl_lock);
spin_unlock(&dst_server->nfs_client->cl_lock);
kfree(copy);
copy = tmp_copy;
goto out;
@ -165,19 +190,32 @@ static int handle_async_copy(struct nfs42_copy_res *res,
memcpy(&copy->stateid, &res->write_res.stateid, NFS4_STATEID_SIZE);
init_completion(&copy->completion);
copy->parent_state = ctx->state;
copy->parent_dst_state = dst_ctx->state;
copy->parent_src_state = src_ctx->state;
list_add_tail(&copy->copies, &server->ss_copies);
spin_unlock(&server->nfs_client->cl_lock);
list_add_tail(&copy->copies, &dst_server->ss_copies);
spin_unlock(&dst_server->nfs_client->cl_lock);
if (dst_server != src_server) {
spin_lock(&src_server->nfs_client->cl_lock);
list_add_tail(&copy->src_copies, &src_server->ss_copies);
spin_unlock(&src_server->nfs_client->cl_lock);
}
status = wait_for_completion_interruptible(&copy->completion);
spin_lock(&server->nfs_client->cl_lock);
spin_lock(&dst_server->nfs_client->cl_lock);
list_del_init(&copy->copies);
spin_unlock(&server->nfs_client->cl_lock);
spin_unlock(&dst_server->nfs_client->cl_lock);
if (dst_server != src_server) {
spin_lock(&src_server->nfs_client->cl_lock);
list_del_init(&copy->src_copies);
spin_unlock(&src_server->nfs_client->cl_lock);
}
if (status == -ERESTARTSYS) {
goto out_cancel;
} else if (copy->flags) {
} else if (copy->flags || copy->error == NFS4ERR_PARTNER_NO_AUTH) {
status = -EAGAIN;
*restart = true;
goto out_cancel;
}
out:
@ -185,12 +223,14 @@ out:
memcpy(&res->write_res.verifier, &copy->verf, sizeof(copy->verf));
status = -copy->error;
out_free:
kfree(copy);
return status;
out_cancel:
nfs42_do_offload_cancel_async(dst, &copy->stateid);
kfree(copy);
return status;
if (!nfs42_files_from_same_server(src, dst))
nfs42_do_offload_cancel_async(src, src_stateid);
goto out_free;
}
static int process_copy_commit(struct file *dst, loff_t pos_dst,
@ -222,7 +262,10 @@ static ssize_t _nfs42_proc_copy(struct file *src,
struct file *dst,
struct nfs_lock_context *dst_lock,
struct nfs42_copy_args *args,
struct nfs42_copy_res *res)
struct nfs42_copy_res *res,
struct nl4_server *nss,
nfs4_stateid *cnr_stateid,
bool *restart)
{
struct rpc_message msg = {
.rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_COPY],
@ -230,17 +273,23 @@ static ssize_t _nfs42_proc_copy(struct file *src,
.rpc_resp = res,
};
struct inode *dst_inode = file_inode(dst);
struct nfs_server *server = NFS_SERVER(dst_inode);
struct inode *src_inode = file_inode(src);
struct nfs_server *dst_server = NFS_SERVER(dst_inode);
struct nfs_server *src_server = NFS_SERVER(src_inode);
loff_t pos_src = args->src_pos;
loff_t pos_dst = args->dst_pos;
size_t count = args->count;
ssize_t status;
status = nfs4_set_rw_stateid(&args->src_stateid, src_lock->open_context,
src_lock, FMODE_READ);
if (status)
return status;
if (nss) {
args->cp_src = nss;
nfs4_stateid_copy(&args->src_stateid, cnr_stateid);
} else {
status = nfs4_set_rw_stateid(&args->src_stateid,
src_lock->open_context, src_lock, FMODE_READ);
if (status)
return status;
}
status = nfs_filemap_write_and_wait_range(file_inode(src)->i_mapping,
pos_src, pos_src + (loff_t)count - 1);
if (status)
@ -262,13 +311,15 @@ static ssize_t _nfs42_proc_copy(struct file *src,
if (!res->commit_res.verf)
return -ENOMEM;
}
set_bit(NFS_CLNT_SRC_SSC_COPY_STATE,
&src_lock->open_context->state->flags);
set_bit(NFS_CLNT_DST_SSC_COPY_STATE,
&dst_lock->open_context->state->flags);
status = nfs4_call_sync(server->client, server, &msg,
status = nfs4_call_sync(dst_server->client, dst_server, &msg,
&args->seq_args, &res->seq_res, 0);
if (status == -ENOTSUPP)
server->caps &= ~NFS_CAP_COPY;
dst_server->caps &= ~NFS_CAP_COPY;
if (status)
goto out;
@ -280,8 +331,8 @@ static ssize_t _nfs42_proc_copy(struct file *src,
}
if (!res->synchronous) {
status = handle_async_copy(res, server, src, dst,
&args->src_stateid);
status = handle_async_copy(res, dst_server, src_server, src,
dst, &args->src_stateid, restart);
if (status)
return status;
}
@ -304,8 +355,9 @@ out:
}
ssize_t nfs42_proc_copy(struct file *src, loff_t pos_src,
struct file *dst, loff_t pos_dst,
size_t count)
struct file *dst, loff_t pos_dst, size_t count,
struct nl4_server *nss,
nfs4_stateid *cnr_stateid, bool sync)
{
struct nfs_server *server = NFS_SERVER(file_inode(dst));
struct nfs_lock_context *src_lock;
@ -316,7 +368,7 @@ ssize_t nfs42_proc_copy(struct file *src, loff_t pos_src,
.dst_fh = NFS_FH(file_inode(dst)),
.dst_pos = pos_dst,
.count = count,
.sync = false,
.sync = sync,
};
struct nfs42_copy_res res;
struct nfs4_exception src_exception = {
@ -328,6 +380,7 @@ ssize_t nfs42_proc_copy(struct file *src, loff_t pos_src,
.stateid = &args.dst_stateid,
};
ssize_t err, err2;
bool restart = false;
src_lock = nfs_get_lock_context(nfs_file_open_context(src));
if (IS_ERR(src_lock))
@ -347,21 +400,33 @@ ssize_t nfs42_proc_copy(struct file *src, loff_t pos_src,
inode_lock(file_inode(dst));
err = _nfs42_proc_copy(src, src_lock,
dst, dst_lock,
&args, &res);
&args, &res,
nss, cnr_stateid, &restart);
inode_unlock(file_inode(dst));
if (err >= 0)
break;
if (err == -ENOTSUPP) {
if (err == -ENOTSUPP &&
nfs42_files_from_same_server(src, dst)) {
err = -EOPNOTSUPP;
break;
} else if (err == -EAGAIN) {
dst_exception.retry = 1;
continue;
if (!restart) {
dst_exception.retry = 1;
continue;
}
break;
} else if (err == -NFS4ERR_OFFLOAD_NO_REQS && !args.sync) {
args.sync = true;
dst_exception.retry = 1;
continue;
} else if ((err == -ESTALE ||
err == -NFS4ERR_OFFLOAD_DENIED ||
err == -ENOTSUPP) &&
!nfs42_files_from_same_server(src, dst)) {
nfs42_do_offload_cancel_async(src, &args.src_stateid);
err = -EOPNOTSUPP;
break;
}
err2 = nfs4_handle_exception(server, err, &src_exception);
@ -459,6 +524,76 @@ static int nfs42_do_offload_cancel_async(struct file *dst,
return status;
}
static int _nfs42_proc_copy_notify(struct file *src, struct file *dst,
struct nfs42_copy_notify_args *args,
struct nfs42_copy_notify_res *res)
{
struct nfs_server *src_server = NFS_SERVER(file_inode(src));
struct rpc_message msg = {
.rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_COPY_NOTIFY],
.rpc_argp = args,
.rpc_resp = res,
};
int status;
struct nfs_open_context *ctx;
struct nfs_lock_context *l_ctx;
ctx = get_nfs_open_context(nfs_file_open_context(src));
l_ctx = nfs_get_lock_context(ctx);
if (IS_ERR(l_ctx))
return PTR_ERR(l_ctx);
status = nfs4_set_rw_stateid(&args->cna_src_stateid, ctx, l_ctx,
FMODE_READ);
nfs_put_lock_context(l_ctx);
if (status)
return status;
status = nfs4_call_sync(src_server->client, src_server, &msg,
&args->cna_seq_args, &res->cnr_seq_res, 0);
if (status == -ENOTSUPP)
src_server->caps &= ~NFS_CAP_COPY_NOTIFY;
put_nfs_open_context(nfs_file_open_context(src));
return status;
}
int nfs42_proc_copy_notify(struct file *src, struct file *dst,
struct nfs42_copy_notify_res *res)
{
struct nfs_server *src_server = NFS_SERVER(file_inode(src));
struct nfs42_copy_notify_args *args;
struct nfs4_exception exception = {
.inode = file_inode(src),
};
int status;
if (!(src_server->caps & NFS_CAP_COPY_NOTIFY))
return -EOPNOTSUPP;
args = kzalloc(sizeof(struct nfs42_copy_notify_args), GFP_NOFS);
if (args == NULL)
return -ENOMEM;
args->cna_src_fh = NFS_FH(file_inode(src)),
args->cna_dst.nl4_type = NL4_NETADDR;
nfs42_set_netaddr(dst, &args->cna_dst.u.nl4_addr);
exception.stateid = &args->cna_src_stateid;
do {
status = _nfs42_proc_copy_notify(src, dst, args, res);
if (status == -ENOTSUPP) {
status = -EOPNOTSUPP;
goto out;
}
status = nfs4_handle_exception(src_server, status, &exception);
} while (exception.retry);
out:
kfree(args);
return status;
}
static loff_t _nfs42_proc_llseek(struct file *filep,
struct nfs_lock_context *lock, loff_t offset, int whence)
{

View File

@ -21,7 +21,10 @@
#define encode_copy_maxsz (op_encode_hdr_maxsz + \
XDR_QUADLEN(NFS4_STATEID_SIZE) + \
XDR_QUADLEN(NFS4_STATEID_SIZE) + \
2 + 2 + 2 + 1 + 1 + 1)
2 + 2 + 2 + 1 + 1 + 1 +\
1 + /* One cnr_source_server */\
1 + /* nl4_type */ \
1 + XDR_QUADLEN(NFS4_OPAQUE_LIMIT))
#define decode_copy_maxsz (op_decode_hdr_maxsz + \
NFS42_WRITE_RES_SIZE + \
1 /* cr_consecutive */ + \
@ -29,6 +32,16 @@
#define encode_offload_cancel_maxsz (op_encode_hdr_maxsz + \
XDR_QUADLEN(NFS4_STATEID_SIZE))
#define decode_offload_cancel_maxsz (op_decode_hdr_maxsz)
#define encode_copy_notify_maxsz (op_encode_hdr_maxsz + \
XDR_QUADLEN(NFS4_STATEID_SIZE) + \
1 + /* nl4_type */ \
1 + XDR_QUADLEN(NFS4_OPAQUE_LIMIT))
#define decode_copy_notify_maxsz (op_decode_hdr_maxsz + \
3 + /* cnr_lease_time */\
XDR_QUADLEN(NFS4_STATEID_SIZE) + \
1 + /* Support 1 cnr_source_server */\
1 + /* nl4_type */ \
1 + XDR_QUADLEN(NFS4_OPAQUE_LIMIT))
#define encode_deallocate_maxsz (op_encode_hdr_maxsz + \
encode_fallocate_maxsz)
#define decode_deallocate_maxsz (op_decode_hdr_maxsz)
@ -99,6 +112,12 @@
decode_sequence_maxsz + \
decode_putfh_maxsz + \
decode_offload_cancel_maxsz)
#define NFS4_enc_copy_notify_sz (compound_encode_hdr_maxsz + \
encode_putfh_maxsz + \
encode_copy_notify_maxsz)
#define NFS4_dec_copy_notify_sz (compound_decode_hdr_maxsz + \
decode_putfh_maxsz + \
decode_copy_notify_maxsz)
#define NFS4_enc_deallocate_sz (compound_encode_hdr_maxsz + \
encode_sequence_maxsz + \
encode_putfh_maxsz + \
@ -166,6 +185,26 @@ static void encode_allocate(struct xdr_stream *xdr,
encode_fallocate(xdr, args);
}
static void encode_nl4_server(struct xdr_stream *xdr,
const struct nl4_server *ns)
{
encode_uint32(xdr, ns->nl4_type);
switch (ns->nl4_type) {
case NL4_NAME:
case NL4_URL:
encode_string(xdr, ns->u.nl4_str_sz, ns->u.nl4_str);
break;
case NL4_NETADDR:
encode_string(xdr, ns->u.nl4_addr.netid_len,
ns->u.nl4_addr.netid);
encode_string(xdr, ns->u.nl4_addr.addr_len,
ns->u.nl4_addr.addr);
break;
default:
WARN_ON_ONCE(1);
}
}
static void encode_copy(struct xdr_stream *xdr,
const struct nfs42_copy_args *args,
struct compound_hdr *hdr)
@ -180,7 +219,12 @@ static void encode_copy(struct xdr_stream *xdr,
encode_uint32(xdr, 1); /* consecutive = true */
encode_uint32(xdr, args->sync);
encode_uint32(xdr, 0); /* src server list */
if (args->cp_src == NULL) { /* intra-ssc */
encode_uint32(xdr, 0); /* no src server list */
return;
}
encode_uint32(xdr, 1); /* supporting 1 server */
encode_nl4_server(xdr, args->cp_src);
}
static void encode_offload_cancel(struct xdr_stream *xdr,
@ -191,6 +235,15 @@ static void encode_offload_cancel(struct xdr_stream *xdr,
encode_nfs4_stateid(xdr, &args->osa_stateid);
}
static void encode_copy_notify(struct xdr_stream *xdr,
const struct nfs42_copy_notify_args *args,
struct compound_hdr *hdr)
{
encode_op_hdr(xdr, OP_COPY_NOTIFY, decode_copy_notify_maxsz, hdr);
encode_nfs4_stateid(xdr, &args->cna_src_stateid);
encode_nl4_server(xdr, &args->cna_dst);
}
static void encode_deallocate(struct xdr_stream *xdr,
const struct nfs42_falloc_args *args,
struct compound_hdr *hdr)
@ -354,6 +407,25 @@ static void nfs4_xdr_enc_offload_cancel(struct rpc_rqst *req,
encode_nops(&hdr);
}
/*
* Encode COPY_NOTIFY request
*/
static void nfs4_xdr_enc_copy_notify(struct rpc_rqst *req,
struct xdr_stream *xdr,
const void *data)
{
const struct nfs42_copy_notify_args *args = data;
struct compound_hdr hdr = {
.minorversion = nfs4_xdr_minorversion(&args->cna_seq_args),
};
encode_compound_hdr(xdr, req, &hdr);
encode_sequence(xdr, &args->cna_seq_args, &hdr);
encode_putfh(xdr, args->cna_src_fh, &hdr);
encode_copy_notify(xdr, args, &hdr);
encode_nops(&hdr);
}
/*
* Encode DEALLOCATE request
*/
@ -490,6 +562,58 @@ static int decode_write_response(struct xdr_stream *xdr,
return decode_verifier(xdr, &res->verifier.verifier);
}
static int decode_nl4_server(struct xdr_stream *xdr, struct nl4_server *ns)
{
struct nfs42_netaddr *naddr;
uint32_t dummy;
char *dummy_str;
__be32 *p;
int status;
/* nl_type */
p = xdr_inline_decode(xdr, 4);
if (unlikely(!p))
return -EIO;
ns->nl4_type = be32_to_cpup(p);
switch (ns->nl4_type) {
case NL4_NAME:
case NL4_URL:
status = decode_opaque_inline(xdr, &dummy, &dummy_str);
if (unlikely(status))
return status;
if (unlikely(dummy > NFS4_OPAQUE_LIMIT))
return -EIO;
memcpy(&ns->u.nl4_str, dummy_str, dummy);
ns->u.nl4_str_sz = dummy;
break;
case NL4_NETADDR:
naddr = &ns->u.nl4_addr;
/* netid string */
status = decode_opaque_inline(xdr, &dummy, &dummy_str);
if (unlikely(status))
return status;
if (unlikely(dummy > RPCBIND_MAXNETIDLEN))
return -EIO;
naddr->netid_len = dummy;
memcpy(naddr->netid, dummy_str, naddr->netid_len);
/* uaddr string */
status = decode_opaque_inline(xdr, &dummy, &dummy_str);
if (unlikely(status))
return status;
if (unlikely(dummy > RPCBIND_MAXUADDRLEN))
return -EIO;
naddr->addr_len = dummy;
memcpy(naddr->addr, dummy_str, naddr->addr_len);
break;
default:
WARN_ON_ONCE(1);
return -EIO;
}
return 0;
}
static int decode_copy_requirements(struct xdr_stream *xdr,
struct nfs42_copy_res *res) {
__be32 *p;
@ -529,6 +653,42 @@ static int decode_offload_cancel(struct xdr_stream *xdr,
return decode_op_hdr(xdr, OP_OFFLOAD_CANCEL);
}
static int decode_copy_notify(struct xdr_stream *xdr,
struct nfs42_copy_notify_res *res)
{
__be32 *p;
int status, count;
status = decode_op_hdr(xdr, OP_COPY_NOTIFY);
if (status)
return status;
/* cnr_lease_time */
p = xdr_inline_decode(xdr, 12);
if (unlikely(!p))
return -EIO;
p = xdr_decode_hyper(p, &res->cnr_lease_time.seconds);
res->cnr_lease_time.nseconds = be32_to_cpup(p);
status = decode_opaque_fixed(xdr, &res->cnr_stateid, NFS4_STATEID_SIZE);
if (unlikely(status))
return -EIO;
/* number of source addresses */
p = xdr_inline_decode(xdr, 4);
if (unlikely(!p))
return -EIO;
count = be32_to_cpup(p);
if (count > 1)
pr_warn("NFS: %s: nsvr %d > Supported. Use first servers\n",
__func__, count);
status = decode_nl4_server(xdr, &res->cnr_src);
if (unlikely(status))
return -EIO;
return 0;
}
static int decode_deallocate(struct xdr_stream *xdr, struct nfs42_falloc_res *res)
{
return decode_op_hdr(xdr, OP_DEALLOCATE);
@ -656,6 +816,32 @@ out:
return status;
}
/*
* Decode COPY_NOTIFY response
*/
static int nfs4_xdr_dec_copy_notify(struct rpc_rqst *rqstp,
struct xdr_stream *xdr,
void *data)
{
struct nfs42_copy_notify_res *res = data;
struct compound_hdr hdr;
int status;
status = decode_compound_hdr(xdr, &hdr);
if (status)
goto out;
status = decode_sequence(xdr, &res->cnr_seq_res, rqstp);
if (status)
goto out;
status = decode_putfh(xdr);
if (status)
goto out;
status = decode_copy_notify(xdr, res);
out:
return status;
}
/*
* Decode DEALLOCATE request
*/

View File

@ -166,9 +166,9 @@ enum {
NFS_STATE_RECOVERY_FAILED, /* OPEN stateid state recovery failed */
NFS_STATE_MAY_NOTIFY_LOCK, /* server may CB_NOTIFY_LOCK */
NFS_STATE_CHANGE_WAIT, /* A state changing operation is outstanding */
#ifdef CONFIG_NFS_V4_2
NFS_CLNT_DST_SSC_COPY_STATE, /* dst server open state on client*/
#endif /* CONFIG_NFS_V4_2 */
NFS_CLNT_SRC_SSC_COPY_STATE, /* src server open state on client*/
NFS_SRV_SSC_COPY_STATE, /* ssc state on the dst server */
};
struct nfs4_state {
@ -311,6 +311,13 @@ extern int nfs4_set_rw_stateid(nfs4_stateid *stateid,
const struct nfs_open_context *ctx,
const struct nfs_lock_context *l_ctx,
fmode_t fmode);
extern int nfs4_proc_getattr(struct nfs_server *server, struct nfs_fh *fhandle,
struct nfs_fattr *fattr, struct nfs4_label *label,
struct inode *inode);
extern int update_open_stateid(struct nfs4_state *state,
const nfs4_stateid *open_stateid,
const nfs4_stateid *deleg_stateid,
fmode_t fmode);
extern int nfs4_proc_get_lease_time(struct nfs_client *clp,
struct nfs_fsinfo *fsinfo);
@ -445,6 +452,8 @@ extern void nfs4_set_lease_period(struct nfs_client *clp,
/* nfs4state.c */
extern const nfs4_stateid current_stateid;
const struct cred *nfs4_get_clid_cred(struct nfs_client *clp);
const struct cred *nfs4_get_machine_cred(struct nfs_client *clp);
const struct cred *nfs4_get_renew_cred(struct nfs_client *clp);
@ -457,6 +466,8 @@ int nfs41_discover_server_trunking(struct nfs_client *clp,
struct nfs_client **, const struct cred *);
extern void nfs4_schedule_session_recovery(struct nfs4_session *, int);
extern void nfs41_notify_server(struct nfs_client *);
bool nfs4_check_serverowner_major_id(struct nfs41_server_owner *o1,
struct nfs41_server_owner *o2);
#else
static inline void nfs4_schedule_session_recovery(struct nfs4_session *session, int err)
{
@ -572,6 +583,12 @@ static inline bool nfs4_stateid_is_newer(const nfs4_stateid *s1, const nfs4_stat
return (s32)(be32_to_cpu(s1->seqid) - be32_to_cpu(s2->seqid)) > 0;
}
static inline bool nfs4_stateid_match_or_older(const nfs4_stateid *dst, const nfs4_stateid *src)
{
return nfs4_stateid_match_other(dst, src) &&
!(src->seqid && nfs4_stateid_is_newer(dst, src));
}
static inline void nfs4_stateid_seqid_inc(nfs4_stateid *s1)
{
u32 seqid = be32_to_cpu(s1->seqid);

View File

@ -629,7 +629,7 @@ out:
/*
* Returns true if the server major ids match
*/
static bool
bool
nfs4_check_serverowner_major_id(struct nfs41_server_owner *o1,
struct nfs41_server_owner *o2)
{
@ -879,14 +879,17 @@ static int nfs4_set_client(struct nfs_server *server,
};
struct nfs_client *clp;
if (minorversion > 0 && proto == XPRT_TRANSPORT_TCP)
if (minorversion == 0)
__set_bit(NFS_CS_REUSEPORT, &cl_init.init_flags);
else if (proto == XPRT_TRANSPORT_TCP)
cl_init.nconnect = nconnect;
if (server->flags & NFS_MOUNT_NORESVPORT)
set_bit(NFS_CS_NORESVPORT, &cl_init.init_flags);
__set_bit(NFS_CS_NORESVPORT, &cl_init.init_flags);
if (server->options & NFS_OPTION_MIGRATION)
set_bit(NFS_CS_MIGRATION, &cl_init.init_flags);
__set_bit(NFS_CS_MIGRATION, &cl_init.init_flags);
if (test_bit(NFS_MIG_TSM_POSSIBLE, &server->mig_status))
set_bit(NFS_CS_TSM_POSSIBLE, &cl_init.init_flags);
__set_bit(NFS_CS_TSM_POSSIBLE, &cl_init.init_flags);
server->port = rpc_get_port(addr);
/* Allocate or find a client reference we can use */

View File

@ -133,14 +133,55 @@ static ssize_t __nfs4_copy_file_range(struct file *file_in, loff_t pos_in,
struct file *file_out, loff_t pos_out,
size_t count, unsigned int flags)
{
struct nfs42_copy_notify_res *cn_resp = NULL;
struct nl4_server *nss = NULL;
nfs4_stateid *cnrs = NULL;
ssize_t ret;
bool sync = false;
/* Only offload copy if superblock is the same */
if (file_inode(file_in)->i_sb != file_inode(file_out)->i_sb)
if (file_in->f_op != &nfs4_file_operations)
return -EXDEV;
if (!nfs_server_capable(file_inode(file_out), NFS_CAP_COPY))
return -EOPNOTSUPP;
if (file_inode(file_in) == file_inode(file_out))
return -EOPNOTSUPP;
return nfs42_proc_copy(file_in, pos_in, file_out, pos_out, count);
/* if the copy size if smaller than 2 RPC payloads, make it
* synchronous
*/
if (count <= 2 * NFS_SERVER(file_inode(file_in))->rsize)
sync = true;
retry:
if (!nfs42_files_from_same_server(file_in, file_out)) {
/* for inter copy, if copy size if smaller than 12 RPC
* payloads, fallback to traditional copy. There are
* 14 RPCs during an NFSv4.x mount between source/dest
* servers.
*/
if (sync ||
count <= 14 * NFS_SERVER(file_inode(file_in))->rsize)
return -EOPNOTSUPP;
cn_resp = kzalloc(sizeof(struct nfs42_copy_notify_res),
GFP_NOFS);
if (unlikely(cn_resp == NULL))
return -ENOMEM;
ret = nfs42_proc_copy_notify(file_in, file_out, cn_resp);
if (ret) {
ret = -EOPNOTSUPP;
goto out;
}
nss = &cn_resp->cnr_src;
cnrs = &cn_resp->cnr_stateid;
}
ret = nfs42_proc_copy(file_in, pos_in, file_out, pos_out, count,
nss, cnrs, sync);
out:
if (!nfs42_files_from_same_server(file_in, file_out))
kfree(cn_resp);
if (ret == -EAGAIN)
goto retry;
return ret;
}
static ssize_t nfs4_copy_file_range(struct file *file_in, loff_t pos_in,
@ -263,6 +304,102 @@ out_unlock:
out:
return ret < 0 ? ret : count;
}
static int read_name_gen = 1;
#define SSC_READ_NAME_BODY "ssc_read_%d"
struct file *
nfs42_ssc_open(struct vfsmount *ss_mnt, struct nfs_fh *src_fh,
nfs4_stateid *stateid)
{
struct nfs_fattr fattr;
struct file *filep, *res;
struct nfs_server *server;
struct inode *r_ino = NULL;
struct nfs_open_context *ctx;
struct nfs4_state_owner *sp;
char *read_name = NULL;
int len, status = 0;
server = NFS_SERVER(ss_mnt->mnt_root->d_inode);
nfs_fattr_init(&fattr);
status = nfs4_proc_getattr(server, src_fh, &fattr, NULL, NULL);
if (status < 0) {
res = ERR_PTR(status);
goto out;
}
res = ERR_PTR(-ENOMEM);
len = strlen(SSC_READ_NAME_BODY) + 16;
read_name = kzalloc(len, GFP_NOFS);
if (read_name == NULL)
goto out;
snprintf(read_name, len, SSC_READ_NAME_BODY, read_name_gen++);
r_ino = nfs_fhget(ss_mnt->mnt_root->d_inode->i_sb, src_fh, &fattr,
NULL);
if (IS_ERR(r_ino)) {
res = ERR_CAST(r_ino);
goto out_free_name;
}
filep = alloc_file_pseudo(r_ino, ss_mnt, read_name, FMODE_READ,
r_ino->i_fop);
if (IS_ERR(filep)) {
res = ERR_CAST(filep);
goto out_free_name;
}
filep->f_mode |= FMODE_READ;
ctx = alloc_nfs_open_context(filep->f_path.dentry, filep->f_mode,
filep);
if (IS_ERR(ctx)) {
res = ERR_CAST(ctx);
goto out_filep;
}
res = ERR_PTR(-EINVAL);
sp = nfs4_get_state_owner(server, ctx->cred, GFP_KERNEL);
if (sp == NULL)
goto out_ctx;
ctx->state = nfs4_get_open_state(r_ino, sp);
if (ctx->state == NULL)
goto out_stateowner;
set_bit(NFS_SRV_SSC_COPY_STATE, &ctx->state->flags);
set_bit(NFS_OPEN_STATE, &ctx->state->flags);
memcpy(&ctx->state->open_stateid.other, &stateid->other,
NFS4_STATEID_OTHER_SIZE);
update_open_stateid(ctx->state, stateid, NULL, filep->f_mode);
nfs_file_set_open_context(filep, ctx);
put_nfs_open_context(ctx);
file_ra_state_init(&filep->f_ra, filep->f_mapping->host->i_mapping);
res = filep;
out_free_name:
kfree(read_name);
out:
return res;
out_stateowner:
nfs4_put_state_owner(sp);
out_ctx:
put_nfs_open_context(ctx);
out_filep:
fput(filep);
goto out_free_name;
}
EXPORT_SYMBOL_GPL(nfs42_ssc_open);
void nfs42_ssc_close(struct file *filep)
{
struct nfs_open_context *ctx = nfs_file_open_context(filep);
ctx->state->flags = 0;
}
EXPORT_SYMBOL_GPL(nfs42_ssc_close);
#endif /* CONFIG_NFS_V4_2 */
const struct file_operations nfs4_file_operations = {

View File

@ -91,7 +91,6 @@ struct nfs4_opendata;
static int _nfs4_recover_proc_open(struct nfs4_opendata *data);
static int nfs4_do_fsinfo(struct nfs_server *, struct nfs_fh *, struct nfs_fsinfo *);
static void nfs_fixup_referral_attributes(struct nfs_fattr *fattr);
static int nfs4_proc_getattr(struct nfs_server *, struct nfs_fh *, struct nfs_fattr *, struct nfs4_label *label, struct inode *inode);
static int _nfs4_proc_getattr(struct nfs_server *server, struct nfs_fh *fhandle, struct nfs_fattr *fattr, struct nfs4_label *label, struct inode *inode);
static int nfs4_do_setattr(struct inode *inode, const struct cred *cred,
struct nfs_fattr *fattr, struct iattr *sattr,
@ -476,6 +475,7 @@ static int nfs4_do_handle_exception(struct nfs_server *server,
case -NFS4ERR_ADMIN_REVOKED:
case -NFS4ERR_EXPIRED:
case -NFS4ERR_BAD_STATEID:
case -NFS4ERR_PARTNER_NO_AUTH:
if (inode != NULL && stateid != NULL) {
nfs_inode_find_state_and_recover(inode,
stateid);
@ -521,9 +521,7 @@ static int nfs4_do_handle_exception(struct nfs_server *server,
case -NFS4ERR_DEADSESSION:
case -NFS4ERR_SEQ_FALSE_RETRY:
case -NFS4ERR_SEQ_MISORDERED:
dprintk("%s ERROR: %d Reset session\n", __func__,
errorcode);
nfs4_schedule_session_recovery(clp->cl_session, errorcode);
/* Handled in nfs41_sequence_process() */
goto wait_on_recovery;
#endif /* defined(CONFIG_NFS_V4_1) */
case -NFS4ERR_FILE_OPEN:
@ -782,6 +780,7 @@ static int nfs41_sequence_process(struct rpc_task *task,
struct nfs4_session *session;
struct nfs4_slot *slot = res->sr_slot;
struct nfs_client *clp;
int status;
int ret = 1;
if (slot == NULL)
@ -793,8 +792,13 @@ static int nfs41_sequence_process(struct rpc_task *task,
session = slot->table->session;
trace_nfs4_sequence_done(session, res);
status = res->sr_status;
if (task->tk_status == -NFS4ERR_DEADSESSION)
status = -NFS4ERR_DEADSESSION;
/* Check the SEQUENCE operation status */
switch (res->sr_status) {
switch (status) {
case 0:
/* Mark this sequence number as having been acked */
nfs4_slot_sequence_acked(slot, slot->seq_nr);
@ -866,6 +870,10 @@ static int nfs41_sequence_process(struct rpc_task *task,
*/
slot->seq_nr = slot->seq_nr_highest_sent;
goto out_retry;
case -NFS4ERR_BADSESSION:
case -NFS4ERR_DEADSESSION:
case -NFS4ERR_CONN_NOT_BOUND_TO_SESSION:
goto session_recover;
default:
/* Just update the slot sequence no. */
slot->seq_done = 1;
@ -876,8 +884,10 @@ out:
out_noaction:
return ret;
session_recover:
nfs4_schedule_session_recovery(session, res->sr_status);
goto retry_nowait;
nfs4_schedule_session_recovery(session, status);
dprintk("%s ERROR: %d Reset session\n", __func__, status);
nfs41_sequence_free_slot(res);
goto out;
retry_new_seq:
++slot->seq_nr;
retry_nowait:
@ -1716,7 +1726,7 @@ static void nfs_state_clear_delegation(struct nfs4_state *state)
write_sequnlock(&state->seqlock);
}
static int update_open_stateid(struct nfs4_state *state,
int update_open_stateid(struct nfs4_state *state,
const nfs4_stateid *open_stateid,
const nfs4_stateid *delegation,
fmode_t fmode)
@ -1737,7 +1747,7 @@ static int update_open_stateid(struct nfs4_state *state,
ret = 1;
}
deleg_cur = rcu_dereference(nfsi->delegation);
deleg_cur = nfs4_get_valid_delegation(state->inode);
if (deleg_cur == NULL)
goto no_delegation;
@ -1749,7 +1759,7 @@ static int update_open_stateid(struct nfs4_state *state,
if (delegation == NULL)
delegation = &deleg_cur->stateid;
else if (!nfs4_stateid_match(&deleg_cur->stateid, delegation))
else if (!nfs4_stateid_match_other(&deleg_cur->stateid, delegation))
goto no_delegation_unlock;
nfs_mark_delegation_referenced(deleg_cur);
@ -1796,7 +1806,7 @@ static void nfs4_return_incompatible_delegation(struct inode *inode, fmode_t fmo
fmode &= FMODE_READ|FMODE_WRITE;
rcu_read_lock();
delegation = rcu_dereference(NFS_I(inode)->delegation);
delegation = nfs4_get_valid_delegation(inode);
if (delegation == NULL || (delegation->type & fmode) == fmode) {
rcu_read_unlock();
return;
@ -2188,7 +2198,6 @@ static int nfs4_handle_delegation_recall_error(struct nfs_server *server, struct
case -NFS4ERR_BAD_HIGH_SLOT:
case -NFS4ERR_CONN_NOT_BOUND_TO_SESSION:
case -NFS4ERR_DEADSESSION:
nfs4_schedule_session_recovery(server->nfs_client->cl_session, err);
return -EAGAIN;
case -NFS4ERR_STALE_CLIENTID:
case -NFS4ERR_STALE_STATEID:
@ -4062,7 +4071,7 @@ static int _nfs4_proc_getattr(struct nfs_server *server, struct nfs_fh *fhandle,
return nfs4_call_sync(server->client, server, &msg, &args.seq_args, &res.seq_res, 0);
}
static int nfs4_proc_getattr(struct nfs_server *server, struct nfs_fh *fhandle,
int nfs4_proc_getattr(struct nfs_server *server, struct nfs_fh *fhandle,
struct nfs_fattr *fattr, struct nfs4_label *label,
struct inode *inode)
{
@ -5098,12 +5107,12 @@ static bool nfs4_stateid_is_current(nfs4_stateid *stateid,
const struct nfs_lock_context *l_ctx,
fmode_t fmode)
{
nfs4_stateid current_stateid;
nfs4_stateid _current_stateid;
/* If the current stateid represents a lost lock, then exit */
if (nfs4_set_rw_stateid(&current_stateid, ctx, l_ctx, fmode) == -EIO)
if (nfs4_set_rw_stateid(&_current_stateid, ctx, l_ctx, fmode) == -EIO)
return true;
return nfs4_stateid_match(stateid, &current_stateid);
return nfs4_stateid_match(stateid, &_current_stateid);
}
static bool nfs4_error_stateid_expired(int err)
@ -6196,10 +6205,13 @@ static void nfs4_delegreturn_done(struct rpc_task *task, void *calldata)
task->tk_status = 0;
break;
case -NFS4ERR_OLD_STATEID:
if (nfs4_refresh_delegation_stateid(&data->stateid, data->inode))
goto out_restart;
task->tk_status = 0;
break;
if (!nfs4_refresh_delegation_stateid(&data->stateid, data->inode))
nfs4_stateid_seqid_inc(&data->stateid);
if (data->args.bitmask) {
data->args.bitmask = NULL;
data->res.fattr = NULL;
}
goto out_restart;
case -NFS4ERR_ACCESS:
if (data->args.bitmask) {
data->args.bitmask = NULL;
@ -6214,6 +6226,7 @@ static void nfs4_delegreturn_done(struct rpc_task *task, void *calldata)
if (exception.retry)
goto out_restart;
}
nfs_delegation_mark_returned(data->inode, data->args.stateid);
data->rpc_status = task->tk_status;
return;
out_restart:
@ -6243,8 +6256,10 @@ static void nfs4_delegreturn_prepare(struct rpc_task *task, void *data)
d_data = (struct nfs4_delegreturndata *)data;
if (!d_data->lr.roc && nfs4_wait_on_layoutreturn(d_data->inode, task))
if (!d_data->lr.roc && nfs4_wait_on_layoutreturn(d_data->inode, task)) {
nfs4_sequence_done(task, &d_data->res.seq_res);
return;
}
lo = d_data->args.lr_args ? d_data->args.lr_args->layout : NULL;
if (lo && !pnfs_layout_is_valid(lo)) {
@ -7820,6 +7835,15 @@ nfs41_same_server_scope(struct nfs41_server_scope *a,
static void
nfs4_bind_one_conn_to_session_done(struct rpc_task *task, void *calldata)
{
struct nfs41_bind_conn_to_session_args *args = task->tk_msg.rpc_argp;
struct nfs_client *clp = args->client;
switch (task->tk_status) {
case -NFS4ERR_BADSESSION:
case -NFS4ERR_DEADSESSION:
nfs4_schedule_session_recovery(clp->cl_session,
task->tk_status);
}
}
static const struct rpc_call_ops nfs4_bind_one_conn_to_session_ops = {
@ -8867,8 +8891,6 @@ static int nfs41_reclaim_complete_handle_errors(struct rpc_task *task, struct nf
case -NFS4ERR_BADSESSION:
case -NFS4ERR_DEADSESSION:
case -NFS4ERR_CONN_NOT_BOUND_TO_SESSION:
nfs4_schedule_session_recovery(clp->cl_session,
task->tk_status);
break;
default:
nfs4_schedule_lease_recovery(clp);
@ -9897,6 +9919,7 @@ static const struct nfs4_minor_version_ops nfs_v4_2_minor_ops = {
| NFS_CAP_ALLOCATE
| NFS_CAP_COPY
| NFS_CAP_OFFLOAD_CANCEL
| NFS_CAP_COPY_NOTIFY
| NFS_CAP_DEALLOCATE
| NFS_CAP_SEEK
| NFS_CAP_LAYOUTSTATS

View File

@ -60,6 +60,7 @@
#include "nfs4session.h"
#include "pnfs.h"
#include "netns.h"
#include "nfs4trace.h"
#define NFSDBG_FACILITY NFSDBG_STATE
@ -1407,7 +1408,7 @@ nfs_state_find_lock_state_by_stateid(struct nfs4_state *state,
list_for_each_entry(pos, &state->lock_states, ls_locks) {
if (!test_bit(NFS_LOCK_INITIALIZED, &pos->ls_flags))
continue;
if (nfs4_stateid_match_other(&pos->ls_stateid, stateid))
if (nfs4_stateid_match_or_older(&pos->ls_stateid, stateid))
return pos;
}
return NULL;
@ -1441,12 +1442,13 @@ void nfs_inode_find_state_and_recover(struct inode *inode,
state = ctx->state;
if (state == NULL)
continue;
if (nfs4_stateid_match_other(&state->stateid, stateid) &&
if (nfs4_stateid_match_or_older(&state->stateid, stateid) &&
nfs4_state_mark_reclaim_nograce(clp, state)) {
found = true;
continue;
}
if (nfs4_stateid_match_other(&state->open_stateid, stateid) &&
if (test_bit(NFS_OPEN_STATE, &state->flags) &&
nfs4_stateid_match_or_older(&state->open_stateid, stateid) &&
nfs4_state_mark_reclaim_nograce(clp, state)) {
found = true;
continue;
@ -1556,16 +1558,32 @@ static void nfs42_complete_copies(struct nfs4_state_owner *sp, struct nfs4_state
{
struct nfs4_copy_state *copy;
if (!test_bit(NFS_CLNT_DST_SSC_COPY_STATE, &state->flags))
if (!test_bit(NFS_CLNT_DST_SSC_COPY_STATE, &state->flags) &&
!test_bit(NFS_CLNT_SRC_SSC_COPY_STATE, &state->flags))
return;
spin_lock(&sp->so_server->nfs_client->cl_lock);
list_for_each_entry(copy, &sp->so_server->ss_copies, copies) {
if (!nfs4_stateid_match_other(&state->stateid, &copy->parent_state->stateid))
continue;
if ((test_bit(NFS_CLNT_DST_SSC_COPY_STATE, &state->flags) &&
!nfs4_stateid_match_other(&state->stateid,
&copy->parent_dst_state->stateid)))
continue;
copy->flags = 1;
complete(&copy->completion);
break;
if (test_and_clear_bit(NFS_CLNT_DST_SSC_COPY_STATE,
&state->flags)) {
clear_bit(NFS_CLNT_SRC_SSC_COPY_STATE, &state->flags);
complete(&copy->completion);
}
}
list_for_each_entry(copy, &sp->so_server->ss_copies, src_copies) {
if ((test_bit(NFS_CLNT_SRC_SSC_COPY_STATE, &state->flags) &&
!nfs4_stateid_match_other(&state->stateid,
&copy->parent_src_state->stateid)))
continue;
copy->flags = 1;
if (test_and_clear_bit(NFS_CLNT_DST_SSC_COPY_STATE,
&state->flags))
complete(&copy->completion);
}
spin_unlock(&sp->so_server->nfs_client->cl_lock);
}
@ -1593,6 +1611,7 @@ static int __nfs4_reclaim_open_state(struct nfs4_state_owner *sp, struct nfs4_st
if (!test_bit(NFS_DELEGATED_STATE, &state->flags)) {
spin_lock(&state->state_lock);
list_for_each_entry(lock, &state->lock_states, ls_locks) {
trace_nfs4_state_lock_reclaim(state, lock);
if (!test_bit(NFS_LOCK_INITIALIZED, &lock->ls_flags))
pr_warn_ratelimited("NFS: %s: Lock reclaim failed!\n", __func__);
}
@ -1609,6 +1628,9 @@ static int nfs4_reclaim_open_state(struct nfs4_state_owner *sp, const struct nfs
struct nfs4_state *state;
unsigned int loop = 0;
int status = 0;
#ifdef CONFIG_NFS_V4_2
bool found_ssc_copy_state = false;
#endif /* CONFIG_NFS_V4_2 */
/* Note: we rely on the sp->so_states list being ordered
* so that we always reclaim open(O_RDWR) and/or open(O_WRITE)
@ -1628,6 +1650,13 @@ restart:
continue;
if (state->state == 0)
continue;
#ifdef CONFIG_NFS_V4_2
if (test_bit(NFS_SRV_SSC_COPY_STATE, &state->flags)) {
nfs4_state_mark_recovery_failed(state, -EIO);
found_ssc_copy_state = true;
continue;
}
#endif /* CONFIG_NFS_V4_2 */
refcount_inc(&state->count);
spin_unlock(&sp->so_lock);
status = __nfs4_reclaim_open_state(sp, state, ops);
@ -1682,6 +1711,10 @@ restart:
}
raw_write_seqcount_end(&sp->so_reclaim_seqcount);
spin_unlock(&sp->so_lock);
#ifdef CONFIG_NFS_V4_2
if (found_ssc_copy_state)
return -EIO;
#endif /* CONFIG_NFS_V4_2 */
return 0;
out_err:
nfs4_put_open_state(state);
@ -2508,6 +2541,7 @@ static void nfs4_state_manager(struct nfs_client *clp)
/* Ensure exclusive access to NFSv4 state */
do {
trace_nfs4_state_mgr(clp);
clear_bit(NFS4CLNT_RUN_MANAGER, &clp->cl_state);
if (test_bit(NFS4CLNT_PURGE_STATE, &clp->cl_state)) {
section = "purge state";
@ -2621,6 +2655,7 @@ static void nfs4_state_manager(struct nfs_client *clp)
out_error:
if (strlen(section))
section_sep = ": ";
trace_nfs4_state_mgr_failed(clp, section, status);
pr_warn_ratelimited("NFS: state manager%s%s failed on NFSv4 server %s"
" with error %d\n", section_sep, section,
clp->cl_hostname, -status);

View File

@ -92,8 +92,8 @@ static void nfs4_evict_inode(struct inode *inode)
{
truncate_inode_pages_final(&inode->i_data);
clear_inode(inode);
/* If we are holding a delegation, return it! */
nfs_inode_return_delegation_noreclaim(inode);
/* If we are holding a delegation, return and free it */
nfs_inode_evict_delegation(inode);
/* Note that above delegreturn would trigger pnfs return-on-close */
pnfs_return_layout(inode);
pnfs_destroy_layout(NFS_I(inode));

View File

@ -562,6 +562,99 @@ TRACE_EVENT(nfs4_setup_sequence,
)
);
TRACE_DEFINE_ENUM(NFS4CLNT_MANAGER_RUNNING);
TRACE_DEFINE_ENUM(NFS4CLNT_CHECK_LEASE);
TRACE_DEFINE_ENUM(NFS4CLNT_LEASE_EXPIRED);
TRACE_DEFINE_ENUM(NFS4CLNT_RECLAIM_REBOOT);
TRACE_DEFINE_ENUM(NFS4CLNT_RECLAIM_NOGRACE);
TRACE_DEFINE_ENUM(NFS4CLNT_DELEGRETURN);
TRACE_DEFINE_ENUM(NFS4CLNT_SESSION_RESET);
TRACE_DEFINE_ENUM(NFS4CLNT_LEASE_CONFIRM);
TRACE_DEFINE_ENUM(NFS4CLNT_SERVER_SCOPE_MISMATCH);
TRACE_DEFINE_ENUM(NFS4CLNT_PURGE_STATE);
TRACE_DEFINE_ENUM(NFS4CLNT_BIND_CONN_TO_SESSION);
TRACE_DEFINE_ENUM(NFS4CLNT_MOVED);
TRACE_DEFINE_ENUM(NFS4CLNT_LEASE_MOVED);
TRACE_DEFINE_ENUM(NFS4CLNT_DELEGATION_EXPIRED);
TRACE_DEFINE_ENUM(NFS4CLNT_RUN_MANAGER);
TRACE_DEFINE_ENUM(NFS4CLNT_DELEGRETURN_RUNNING);
#define show_nfs4_clp_state(state) \
__print_flags(state, "|", \
{ NFS4CLNT_MANAGER_RUNNING, "MANAGER_RUNNING" }, \
{ NFS4CLNT_CHECK_LEASE, "CHECK_LEASE" }, \
{ NFS4CLNT_LEASE_EXPIRED, "LEASE_EXPIRED" }, \
{ NFS4CLNT_RECLAIM_REBOOT, "RECLAIM_REBOOT" }, \
{ NFS4CLNT_RECLAIM_NOGRACE, "RECLAIM_NOGRACE" }, \
{ NFS4CLNT_DELEGRETURN, "DELEGRETURN" }, \
{ NFS4CLNT_SESSION_RESET, "SESSION_RESET" }, \
{ NFS4CLNT_LEASE_CONFIRM, "LEASE_CONFIRM" }, \
{ NFS4CLNT_SERVER_SCOPE_MISMATCH, \
"SERVER_SCOPE_MISMATCH" }, \
{ NFS4CLNT_PURGE_STATE, "PURGE_STATE" }, \
{ NFS4CLNT_BIND_CONN_TO_SESSION, \
"BIND_CONN_TO_SESSION" }, \
{ NFS4CLNT_MOVED, "MOVED" }, \
{ NFS4CLNT_LEASE_MOVED, "LEASE_MOVED" }, \
{ NFS4CLNT_DELEGATION_EXPIRED, "DELEGATION_EXPIRED" }, \
{ NFS4CLNT_RUN_MANAGER, "RUN_MANAGER" }, \
{ NFS4CLNT_DELEGRETURN_RUNNING, "DELEGRETURN_RUNNING" })
TRACE_EVENT(nfs4_state_mgr,
TP_PROTO(
const struct nfs_client *clp
),
TP_ARGS(clp),
TP_STRUCT__entry(
__field(unsigned long, state)
__string(hostname, clp->cl_hostname)
),
TP_fast_assign(
__entry->state = clp->cl_state;
__assign_str(hostname, clp->cl_hostname)
),
TP_printk(
"hostname=%s clp state=%s", __get_str(hostname),
show_nfs4_clp_state(__entry->state)
)
)
TRACE_EVENT(nfs4_state_mgr_failed,
TP_PROTO(
const struct nfs_client *clp,
const char *section,
int status
),
TP_ARGS(clp, section, status),
TP_STRUCT__entry(
__field(unsigned long, error)
__field(unsigned long, state)
__string(hostname, clp->cl_hostname)
__string(section, section)
),
TP_fast_assign(
__entry->error = status;
__entry->state = clp->cl_state;
__assign_str(hostname, clp->cl_hostname);
__assign_str(section, section);
),
TP_printk(
"hostname=%s clp state=%s error=%ld (%s) section=%s",
__get_str(hostname),
show_nfs4_clp_state(__entry->state), -__entry->error,
show_nfsv4_errors(__entry->error), __get_str(section)
)
)
TRACE_EVENT(nfs4_xdr_status,
TP_PROTO(
const struct xdr_stream *xdr,
@ -929,6 +1022,88 @@ TRACE_EVENT(nfs4_set_lock,
)
);
TRACE_DEFINE_ENUM(LK_STATE_IN_USE);
TRACE_DEFINE_ENUM(NFS_DELEGATED_STATE);
TRACE_DEFINE_ENUM(NFS_OPEN_STATE);
TRACE_DEFINE_ENUM(NFS_O_RDONLY_STATE);
TRACE_DEFINE_ENUM(NFS_O_WRONLY_STATE);
TRACE_DEFINE_ENUM(NFS_O_RDWR_STATE);
TRACE_DEFINE_ENUM(NFS_STATE_RECLAIM_REBOOT);
TRACE_DEFINE_ENUM(NFS_STATE_RECLAIM_NOGRACE);
TRACE_DEFINE_ENUM(NFS_STATE_POSIX_LOCKS);
TRACE_DEFINE_ENUM(NFS_STATE_RECOVERY_FAILED);
TRACE_DEFINE_ENUM(NFS_STATE_MAY_NOTIFY_LOCK);
TRACE_DEFINE_ENUM(NFS_STATE_CHANGE_WAIT);
TRACE_DEFINE_ENUM(NFS_CLNT_DST_SSC_COPY_STATE);
TRACE_DEFINE_ENUM(NFS_CLNT_SRC_SSC_COPY_STATE);
TRACE_DEFINE_ENUM(NFS_SRV_SSC_COPY_STATE);
#define show_nfs4_state_flags(flags) \
__print_flags(flags, "|", \
{ LK_STATE_IN_USE, "IN_USE" }, \
{ NFS_DELEGATED_STATE, "DELEGATED" }, \
{ NFS_OPEN_STATE, "OPEN" }, \
{ NFS_O_RDONLY_STATE, "O_RDONLY" }, \
{ NFS_O_WRONLY_STATE, "O_WRONLY" }, \
{ NFS_O_RDWR_STATE, "O_RDWR" }, \
{ NFS_STATE_RECLAIM_REBOOT, "RECLAIM_REBOOT" }, \
{ NFS_STATE_RECLAIM_NOGRACE, "RECLAIM_NOGRACE" }, \
{ NFS_STATE_POSIX_LOCKS, "POSIX_LOCKS" }, \
{ NFS_STATE_RECOVERY_FAILED, "RECOVERY_FAILED" }, \
{ NFS_STATE_MAY_NOTIFY_LOCK, "MAY_NOTIFY_LOCK" }, \
{ NFS_STATE_CHANGE_WAIT, "CHANGE_WAIT" }, \
{ NFS_CLNT_DST_SSC_COPY_STATE, "CLNT_DST_SSC_COPY" }, \
{ NFS_CLNT_SRC_SSC_COPY_STATE, "CLNT_SRC_SSC_COPY" }, \
{ NFS_SRV_SSC_COPY_STATE, "SRV_SSC_COPY" })
#define show_nfs4_lock_flags(flags) \
__print_flags(flags, "|", \
{ BIT(NFS_LOCK_INITIALIZED), "INITIALIZED" }, \
{ BIT(NFS_LOCK_LOST), "LOST" })
TRACE_EVENT(nfs4_state_lock_reclaim,
TP_PROTO(
const struct nfs4_state *state,
const struct nfs4_lock_state *lock
),
TP_ARGS(state, lock),
TP_STRUCT__entry(
__field(dev_t, dev)
__field(u32, fhandle)
__field(u64, fileid)
__field(unsigned long, state_flags)
__field(unsigned long, lock_flags)
__field(int, stateid_seq)
__field(u32, stateid_hash)
),
TP_fast_assign(
const struct inode *inode = state->inode;
__entry->dev = inode->i_sb->s_dev;
__entry->fileid = NFS_FILEID(inode);
__entry->fhandle = nfs_fhandle_hash(NFS_FH(inode));
__entry->state_flags = state->flags;
__entry->lock_flags = lock->ls_flags;
__entry->stateid_seq =
be32_to_cpu(state->stateid.seqid);
__entry->stateid_hash =
nfs_stateid_hash(&state->stateid);
),
TP_printk(
"fileid=%02x:%02x:%llu fhandle=0x%08x "
"stateid=%d:0x%08x state_flags=%s lock_flags=%s",
MAJOR(__entry->dev), MINOR(__entry->dev),
(unsigned long long)__entry->fileid, __entry->fhandle,
__entry->stateid_seq, __entry->stateid_hash,
show_nfs4_state_flags(__entry->state_flags),
show_nfs4_lock_flags(__entry->lock_flags)
)
)
DECLARE_EVENT_CLASS(nfs4_set_delegation_event,
TP_PROTO(
const struct inode *inode,

View File

@ -1059,7 +1059,7 @@ static void encode_nfs4_verifier(struct xdr_stream *xdr, const nfs4_verifier *ve
}
static __be32 *
xdr_encode_nfstime4(__be32 *p, const struct timespec *t)
xdr_encode_nfstime4(__be32 *p, const struct timespec64 *t)
{
p = xdr_encode_hyper(p, (__s64)t->tv_sec);
*p++ = cpu_to_be32(t->tv_nsec);
@ -1072,7 +1072,6 @@ static void encode_attrs(struct xdr_stream *xdr, const struct iattr *iap,
const struct nfs_server *server,
const uint32_t attrmask[])
{
struct timespec ts;
char owner_name[IDMAP_NAMESZ];
char owner_group[IDMAP_NAMESZ];
int owner_namelen = 0;
@ -1161,16 +1160,14 @@ static void encode_attrs(struct xdr_stream *xdr, const struct iattr *iap,
if (bmval[1] & FATTR4_WORD1_TIME_ACCESS_SET) {
if (iap->ia_valid & ATTR_ATIME_SET) {
*p++ = cpu_to_be32(NFS4_SET_TO_CLIENT_TIME);
ts = timespec64_to_timespec(iap->ia_atime);
p = xdr_encode_nfstime4(p, &ts);
p = xdr_encode_nfstime4(p, &iap->ia_atime);
} else
*p++ = cpu_to_be32(NFS4_SET_TO_SERVER_TIME);
}
if (bmval[1] & FATTR4_WORD1_TIME_MODIFY_SET) {
if (iap->ia_valid & ATTR_MTIME_SET) {
*p++ = cpu_to_be32(NFS4_SET_TO_CLIENT_TIME);
ts = timespec64_to_timespec(iap->ia_mtime);
p = xdr_encode_nfstime4(p, &ts);
p = xdr_encode_nfstime4(p, &iap->ia_mtime);
} else
*p++ = cpu_to_be32(NFS4_SET_TO_SERVER_TIME);
}
@ -4065,17 +4062,17 @@ static int decode_attr_space_used(struct xdr_stream *xdr, uint32_t *bitmap, uint
}
static __be32 *
xdr_decode_nfstime4(__be32 *p, struct timespec *t)
xdr_decode_nfstime4(__be32 *p, struct timespec64 *t)
{
__u64 sec;
p = xdr_decode_hyper(p, &sec);
t-> tv_sec = (time_t)sec;
t-> tv_sec = sec;
t->tv_nsec = be32_to_cpup(p++);
return p;
}
static int decode_attr_time(struct xdr_stream *xdr, struct timespec *time)
static int decode_attr_time(struct xdr_stream *xdr, struct timespec64 *time)
{
__be32 *p;
@ -4086,7 +4083,7 @@ static int decode_attr_time(struct xdr_stream *xdr, struct timespec *time)
return 0;
}
static int decode_attr_time_access(struct xdr_stream *xdr, uint32_t *bitmap, struct timespec *time)
static int decode_attr_time_access(struct xdr_stream *xdr, uint32_t *bitmap, struct timespec64 *time)
{
int status = 0;
@ -4104,7 +4101,7 @@ static int decode_attr_time_access(struct xdr_stream *xdr, uint32_t *bitmap, str
return status;
}
static int decode_attr_time_metadata(struct xdr_stream *xdr, uint32_t *bitmap, struct timespec *time)
static int decode_attr_time_metadata(struct xdr_stream *xdr, uint32_t *bitmap, struct timespec64 *time)
{
int status = 0;
@ -4123,7 +4120,7 @@ static int decode_attr_time_metadata(struct xdr_stream *xdr, uint32_t *bitmap, s
}
static int decode_attr_time_delta(struct xdr_stream *xdr, uint32_t *bitmap,
struct timespec *time)
struct timespec64 *time)
{
int status = 0;
@ -4186,7 +4183,7 @@ static int decode_attr_security_label(struct xdr_stream *xdr, uint32_t *bitmap,
return status;
}
static int decode_attr_time_modify(struct xdr_stream *xdr, uint32_t *bitmap, struct timespec *time)
static int decode_attr_time_modify(struct xdr_stream *xdr, uint32_t *bitmap, struct timespec64 *time)
{
int status = 0;
@ -7581,6 +7578,7 @@ const struct rpc_procinfo nfs4_procedures[] = {
PROC42(CLONE, enc_clone, dec_clone),
PROC42(COPY, enc_copy, dec_copy),
PROC42(OFFLOAD_CANCEL, enc_offload_cancel, dec_offload_cancel),
PROC42(COPY_NOTIFY, enc_copy_notify, dec_copy_notify),
PROC(LOOKUPP, enc_lookupp, dec_lookupp),
PROC42(LAYOUTERROR, enc_layouterror, dec_layouterror),
};

View File

@ -1065,6 +1065,39 @@ TRACE_EVENT(nfs_commit_done,
)
);
TRACE_EVENT(nfs_fh_to_dentry,
TP_PROTO(
const struct super_block *sb,
const struct nfs_fh *fh,
u64 fileid,
int error
),
TP_ARGS(sb, fh, fileid, error),
TP_STRUCT__entry(
__field(int, error)
__field(dev_t, dev)
__field(u32, fhandle)
__field(u64, fileid)
),
TP_fast_assign(
__entry->error = error;
__entry->dev = sb->s_dev;
__entry->fileid = fileid;
__entry->fhandle = nfs_fhandle_hash(fh);
),
TP_printk(
"error=%d fileid=%02x:%02x:%llu fhandle=0x%08x ",
__entry->error,
MAJOR(__entry->dev), MINOR(__entry->dev),
(unsigned long long)__entry->fileid,
__entry->fhandle
)
);
TRACE_DEFINE_ENUM(NFS_OK);
TRACE_DEFINE_ENUM(NFSERR_PERM);
TRACE_DEFINE_ENUM(NFSERR_NOENT);

View File

@ -2160,8 +2160,6 @@ out_unlock:
return NULL;
}
extern const nfs4_stateid current_stateid;
static void _lgopen_prepare_attached(struct nfs4_opendata *data,
struct nfs_open_context *ctx)
{

View File

@ -1592,7 +1592,7 @@ static int nfs_parse_mount_options(char *raw,
dfprintk(MOUNT, "NFS: invalid "
"lookupcache argument\n");
return 0;
};
}
break;
case Opt_fscache_uniq:
if (nfs_get_option_str(args, &mnt->fscache_uniq))
@ -1625,7 +1625,7 @@ static int nfs_parse_mount_options(char *raw,
dfprintk(MOUNT, "NFS: invalid "
"local_lock argument\n");
return 0;
};
}
break;
/*
@ -2585,7 +2585,7 @@ static void nfs_get_cache_cookie(struct super_block *sb,
if (mnt_s->fscache_key) {
uniq = mnt_s->fscache_key->key.uniquifier;
ulen = mnt_s->fscache_key->key.uniq_len;
};
}
} else
return;

View File

@ -121,8 +121,7 @@ static void nfs_netns_client_release(struct kobject *kobj)
struct nfs_netns_client,
kobject);
if (c->identifier)
kfree(c->identifier);
kfree(c->identifier);
kfree(c);
}

View File

@ -16,6 +16,7 @@
#include <linux/list.h>
#include <linux/uidgid.h>
#include <uapi/linux/nfs4.h>
#include <linux/sunrpc/msg_prot.h>
enum nfs4_acl_whotype {
NFS4_ACL_WHO_NAMED = 0,
@ -539,6 +540,8 @@ enum {
NFSPROC4_CLNT_LOOKUPP,
NFSPROC4_CLNT_LAYOUTERROR,
NFSPROC4_CLNT_COPY_NOTIFY,
};
/* nfs41 types */
@ -674,4 +677,27 @@ struct nfs4_op_map {
} u;
};
struct nfs42_netaddr {
char netid[RPCBIND_MAXNETIDLEN];
char addr[RPCBIND_MAXUADDRLEN + 1];
u32 netid_len;
u32 addr_len;
};
enum netloc_type4 {
NL4_NAME = 1,
NL4_URL = 2,
NL4_NETADDR = 3,
};
struct nl4_server {
enum netloc_type4 nl4_type;
union {
struct { /* NL4_NAME, NL4_URL */
int nl4_str_sz;
char nl4_str[NFS4_OPAQUE_LIMIT + 1];
};
struct nfs42_netaddr nl4_addr; /* NL4_NETADDR */
} u;
};
#endif

View File

@ -189,13 +189,15 @@ struct nfs_inode {
struct nfs4_copy_state {
struct list_head copies;
struct list_head src_copies;
nfs4_stateid stateid;
struct completion completion;
uint64_t count;
struct nfs_writeverf verf;
int error;
int flags;
struct nfs4_state *parent_state;
struct nfs4_state *parent_src_state;
struct nfs4_state *parent_dst_state;
};
/*

View File

@ -45,6 +45,9 @@ struct nfs_client {
#define NFS_CS_INFINITE_SLOTS 3 /* - don't limit TCP slots */
#define NFS_CS_NO_RETRANS_TIMEOUT 4 /* - Disable retransmit timeouts */
#define NFS_CS_TSM_POSSIBLE 5 /* - Maybe state migration */
#define NFS_CS_NOPING 6 /* - don't ping on connect */
#define NFS_CS_DS 7 /* - Server is a DS */
#define NFS_CS_REUSEPORT 8 /* - reuse src port on reconnect */
struct sockaddr_storage cl_addr; /* server identifier */
size_t cl_addrlen;
char * cl_hostname; /* hostname of server */
@ -171,7 +174,7 @@ struct nfs_server {
struct nfs_fsid fsid;
__u64 maxfilesize; /* maximum file size */
struct timespec time_delta; /* smallest time granularity */
struct timespec64 time_delta; /* smallest time granularity */
unsigned long mount_time; /* when this fs was mounted */
struct super_block *super; /* VFS super block */
dev_t s_dev; /* superblock dev numbers */
@ -276,5 +279,6 @@ struct nfs_server {
#define NFS_CAP_COPY (1U << 24)
#define NFS_CAP_OFFLOAD_CANCEL (1U << 25)
#define NFS_CAP_LAYOUTERROR (1U << 26)
#define NFS_CAP_COPY_NOTIFY (1U << 27)
#endif

View File

@ -62,14 +62,14 @@ struct nfs_fattr {
struct nfs_fsid fsid;
__u64 fileid;
__u64 mounted_on_fileid;
struct timespec atime;
struct timespec mtime;
struct timespec ctime;
struct timespec64 atime;
struct timespec64 mtime;
struct timespec64 ctime;
__u64 change_attr; /* NFSv4 change attribute */
__u64 pre_change_attr;/* pre-op NFSv4 change attribute */
__u64 pre_size; /* pre_op_attr.size */
struct timespec pre_mtime; /* pre_op_attr.mtime */
struct timespec pre_ctime; /* pre_op_attr.ctime */
struct timespec64 pre_mtime; /* pre_op_attr.mtime */
struct timespec64 pre_ctime; /* pre_op_attr.ctime */
unsigned long time_start;
unsigned long gencount;
struct nfs4_string *owner_name;
@ -143,7 +143,7 @@ struct nfs_fsinfo {
__u32 wtmult; /* writes should be multiple of this */
__u32 dtpref; /* pref. readdir transfer size */
__u64 maxfilesize;
struct timespec time_delta; /* server time granularity */
struct timespec64 time_delta; /* server time granularity */
__u32 lease_time; /* in seconds */
__u32 nlayouttypes; /* number of layouttypes */
__u32 layouttype[NFS_MAX_LAYOUT_TYPES]; /* supported pnfs layout driver */
@ -869,7 +869,7 @@ struct nfs3_sattrargs {
struct nfs_fh * fh;
struct iattr * sattr;
unsigned int guard;
struct timespec guardtime;
struct timespec64 guardtime;
};
struct nfs3_diropargs {
@ -1435,6 +1435,7 @@ struct nfs42_copy_args {
u64 count;
bool sync;
struct nl4_server *cp_src;
};
struct nfs42_write_res {
@ -1463,6 +1464,22 @@ struct nfs42_offload_status_res {
int osr_status;
};
struct nfs42_copy_notify_args {
struct nfs4_sequence_args cna_seq_args;
struct nfs_fh *cna_src_fh;
nfs4_stateid cna_src_stateid;
struct nl4_server cna_dst;
};
struct nfs42_copy_notify_res {
struct nfs4_sequence_res cnr_seq_res;
struct nfstime4 cnr_lease_time;
nfs4_stateid cnr_stateid;
struct nl4_server cnr_src;
};
struct nfs42_seek_args {
struct nfs4_sequence_args seq_args;

View File

@ -149,6 +149,7 @@ struct rpc_add_xprt_test {
#define RPC_CLNT_CREATE_NO_IDLE_TIMEOUT (1UL << 8)
#define RPC_CLNT_CREATE_NO_RETRANS_TIMEOUT (1UL << 9)
#define RPC_CLNT_CREATE_SOFTERR (1UL << 10)
#define RPC_CLNT_CREATE_REUSEPORT (1UL << 11)
struct rpc_clnt *rpc_create(struct rpc_create_args *args);
struct rpc_clnt *rpc_bind_new_program(struct rpc_clnt *,

View File

@ -207,7 +207,8 @@ struct rpc_xprt {
unsigned int min_reqs; /* min number of slots */
unsigned int num_reqs; /* total slots */
unsigned long state; /* transport state */
unsigned char resvport : 1; /* use a reserved port */
unsigned char resvport : 1, /* use a reserved port */
reuseport : 1; /* reuse port on reconnect */
atomic_t swapper; /* we're swapping over this
transport */
unsigned int bind_index; /* bind function index */

View File

@ -85,6 +85,44 @@ DECLARE_EVENT_CLASS(xprtrdma_rxprt,
), \
TP_ARGS(r_xprt))
DECLARE_EVENT_CLASS(xprtrdma_connect_class,
TP_PROTO(
const struct rpcrdma_xprt *r_xprt,
int rc
),
TP_ARGS(r_xprt, rc),
TP_STRUCT__entry(
__field(const void *, r_xprt)
__field(int, rc)
__field(int, connect_status)
__string(addr, rpcrdma_addrstr(r_xprt))
__string(port, rpcrdma_portstr(r_xprt))
),
TP_fast_assign(
__entry->r_xprt = r_xprt;
__entry->rc = rc;
__entry->connect_status = r_xprt->rx_ep.rep_connected;
__assign_str(addr, rpcrdma_addrstr(r_xprt));
__assign_str(port, rpcrdma_portstr(r_xprt));
),
TP_printk("peer=[%s]:%s r_xprt=%p: rc=%d connect status=%d",
__get_str(addr), __get_str(port), __entry->r_xprt,
__entry->rc, __entry->connect_status
)
);
#define DEFINE_CONN_EVENT(name) \
DEFINE_EVENT(xprtrdma_connect_class, xprtrdma_##name, \
TP_PROTO( \
const struct rpcrdma_xprt *r_xprt, \
int rc \
), \
TP_ARGS(r_xprt, rc))
DECLARE_EVENT_CLASS(xprtrdma_rdch_event,
TP_PROTO(
const struct rpc_task *task,
@ -333,47 +371,81 @@ TRACE_EVENT(xprtrdma_cm_event,
)
);
TRACE_EVENT(xprtrdma_disconnect,
TRACE_EVENT(xprtrdma_inline_thresh,
TP_PROTO(
const struct rpcrdma_xprt *r_xprt,
int status
const struct rpcrdma_xprt *r_xprt
),
TP_ARGS(r_xprt, status),
TP_ARGS(r_xprt),
TP_STRUCT__entry(
__field(const void *, r_xprt)
__field(int, status)
__field(int, connected)
__field(unsigned int, inline_send)
__field(unsigned int, inline_recv)
__field(unsigned int, max_send)
__field(unsigned int, max_recv)
__string(addr, rpcrdma_addrstr(r_xprt))
__string(port, rpcrdma_portstr(r_xprt))
),
TP_fast_assign(
const struct rpcrdma_ep *ep = &r_xprt->rx_ep;
__entry->r_xprt = r_xprt;
__entry->inline_send = ep->rep_inline_send;
__entry->inline_recv = ep->rep_inline_recv;
__entry->max_send = ep->rep_max_inline_send;
__entry->max_recv = ep->rep_max_inline_recv;
__assign_str(addr, rpcrdma_addrstr(r_xprt));
__assign_str(port, rpcrdma_portstr(r_xprt));
),
TP_printk("peer=[%s]:%s r_xprt=%p neg send/recv=%u/%u, calc send/recv=%u/%u",
__get_str(addr), __get_str(port), __entry->r_xprt,
__entry->inline_send, __entry->inline_recv,
__entry->max_send, __entry->max_recv
)
);
DEFINE_CONN_EVENT(connect);
DEFINE_CONN_EVENT(disconnect);
DEFINE_RXPRT_EVENT(xprtrdma_create);
DEFINE_RXPRT_EVENT(xprtrdma_op_destroy);
DEFINE_RXPRT_EVENT(xprtrdma_remove);
DEFINE_RXPRT_EVENT(xprtrdma_reinsert);
DEFINE_RXPRT_EVENT(xprtrdma_op_inject_dsc);
DEFINE_RXPRT_EVENT(xprtrdma_op_close);
DEFINE_RXPRT_EVENT(xprtrdma_op_setport);
TRACE_EVENT(xprtrdma_op_connect,
TP_PROTO(
const struct rpcrdma_xprt *r_xprt,
unsigned long delay
),
TP_ARGS(r_xprt, delay),
TP_STRUCT__entry(
__field(const void *, r_xprt)
__field(unsigned long, delay)
__string(addr, rpcrdma_addrstr(r_xprt))
__string(port, rpcrdma_portstr(r_xprt))
),
TP_fast_assign(
__entry->r_xprt = r_xprt;
__entry->status = status;
__entry->connected = r_xprt->rx_ep.rep_connected;
__entry->delay = delay;
__assign_str(addr, rpcrdma_addrstr(r_xprt));
__assign_str(port, rpcrdma_portstr(r_xprt));
),
TP_printk("peer=[%s]:%s r_xprt=%p: status=%d %sconnected",
__get_str(addr), __get_str(port),
__entry->r_xprt, __entry->status,
__entry->connected == 1 ? "still " : "dis"
TP_printk("peer=[%s]:%s r_xprt=%p delay=%lu",
__get_str(addr), __get_str(port), __entry->r_xprt,
__entry->delay
)
);
DEFINE_RXPRT_EVENT(xprtrdma_conn_start);
DEFINE_RXPRT_EVENT(xprtrdma_conn_tout);
DEFINE_RXPRT_EVENT(xprtrdma_create);
DEFINE_RXPRT_EVENT(xprtrdma_op_destroy);
DEFINE_RXPRT_EVENT(xprtrdma_remove);
DEFINE_RXPRT_EVENT(xprtrdma_reinsert);
DEFINE_RXPRT_EVENT(xprtrdma_reconnect);
DEFINE_RXPRT_EVENT(xprtrdma_op_inject_dsc);
DEFINE_RXPRT_EVENT(xprtrdma_op_close);
DEFINE_RXPRT_EVENT(xprtrdma_op_connect);
TRACE_EVENT(xprtrdma_op_set_cto,
TP_PROTO(
@ -532,6 +604,8 @@ DEFINE_WRCH_EVENT(write);
DEFINE_WRCH_EVENT(reply);
TRACE_DEFINE_ENUM(rpcrdma_noch);
TRACE_DEFINE_ENUM(rpcrdma_noch_pullup);
TRACE_DEFINE_ENUM(rpcrdma_noch_mapped);
TRACE_DEFINE_ENUM(rpcrdma_readch);
TRACE_DEFINE_ENUM(rpcrdma_areadch);
TRACE_DEFINE_ENUM(rpcrdma_writech);
@ -540,6 +614,8 @@ TRACE_DEFINE_ENUM(rpcrdma_replych);
#define xprtrdma_show_chunktype(x) \
__print_symbolic(x, \
{ rpcrdma_noch, "inline" }, \
{ rpcrdma_noch_pullup, "pullup" }, \
{ rpcrdma_noch_mapped, "mapped" }, \
{ rpcrdma_readch, "read list" }, \
{ rpcrdma_areadch, "*read list" }, \
{ rpcrdma_writech, "write list" }, \
@ -667,9 +743,8 @@ TRACE_EVENT(xprtrdma_post_send,
__entry->client_id = rqst->rq_task->tk_client ?
rqst->rq_task->tk_client->cl_clid : -1;
__entry->req = req;
__entry->num_sge = req->rl_sendctx->sc_wr.num_sge;
__entry->signaled = req->rl_sendctx->sc_wr.send_flags &
IB_SEND_SIGNALED;
__entry->num_sge = req->rl_wr.num_sge;
__entry->signaled = req->rl_wr.send_flags & IB_SEND_SIGNALED;
__entry->status = status;
),
@ -735,6 +810,31 @@ TRACE_EVENT(xprtrdma_post_recvs,
)
);
TRACE_EVENT(xprtrdma_post_linv,
TP_PROTO(
const struct rpcrdma_req *req,
int status
),
TP_ARGS(req, status),
TP_STRUCT__entry(
__field(const void *, req)
__field(int, status)
__field(u32, xid)
),
TP_fast_assign(
__entry->req = req;
__entry->status = status;
__entry->xid = be32_to_cpu(req->rl_slot.rq_xid);
),
TP_printk("req=%p xid=0x%08x status=%d",
__entry->req, __entry->xid, __entry->status
)
);
/**
** Completion events
**/
@ -1021,66 +1121,32 @@ DEFINE_REPLY_EVENT(xprtrdma_reply_hdr);
TRACE_EVENT(xprtrdma_fixup,
TP_PROTO(
const struct rpc_rqst *rqst,
int len,
int hdrlen
unsigned long fixup
),
TP_ARGS(rqst, len, hdrlen),
TP_ARGS(rqst, fixup),
TP_STRUCT__entry(
__field(unsigned int, task_id)
__field(unsigned int, client_id)
__field(const void *, base)
__field(int, len)
__field(int, hdrlen)
__field(unsigned long, fixup)
__field(size_t, headlen)
__field(unsigned int, pagelen)
__field(size_t, taillen)
),
TP_fast_assign(
__entry->task_id = rqst->rq_task->tk_pid;
__entry->client_id = rqst->rq_task->tk_client->cl_clid;
__entry->base = rqst->rq_rcv_buf.head[0].iov_base;
__entry->len = len;
__entry->hdrlen = hdrlen;
__entry->fixup = fixup;
__entry->headlen = rqst->rq_rcv_buf.head[0].iov_len;
__entry->pagelen = rqst->rq_rcv_buf.page_len;
__entry->taillen = rqst->rq_rcv_buf.tail[0].iov_len;
),
TP_printk("task:%u@%u base=%p len=%d hdrlen=%d",
__entry->task_id, __entry->client_id,
__entry->base, __entry->len, __entry->hdrlen
)
);
TRACE_EVENT(xprtrdma_fixup_pg,
TP_PROTO(
const struct rpc_rqst *rqst,
int pageno,
const void *pos,
int len,
int curlen
),
TP_ARGS(rqst, pageno, pos, len, curlen),
TP_STRUCT__entry(
__field(unsigned int, task_id)
__field(unsigned int, client_id)
__field(const void *, pos)
__field(int, pageno)
__field(int, len)
__field(int, curlen)
),
TP_fast_assign(
__entry->task_id = rqst->rq_task->tk_pid;
__entry->client_id = rqst->rq_task->tk_client->cl_clid;
__entry->pos = pos;
__entry->pageno = pageno;
__entry->len = len;
__entry->curlen = curlen;
),
TP_printk("task:%u@%u pageno=%d pos=%p len=%d curlen=%d",
__entry->task_id, __entry->client_id,
__entry->pageno, __entry->pos, __entry->len, __entry->curlen
TP_printk("task:%u@%u fixup=%lu xdr=%zu/%u/%zu",
__entry->task_id, __entry->client_id, __entry->fixup,
__entry->headlen, __entry->pagelen, __entry->taillen
)
);

View File

@ -165,6 +165,7 @@ DECLARE_EVENT_CLASS(rpc_task_running,
DEFINE_RPC_RUNNING_EVENT(begin);
DEFINE_RPC_RUNNING_EVENT(run_action);
DEFINE_RPC_RUNNING_EVENT(complete);
DEFINE_RPC_RUNNING_EVENT(end);
DECLARE_EVENT_CLASS(rpc_task_queued,
@ -777,6 +778,99 @@ TRACE_EVENT(xprt_ping,
__get_str(addr), __get_str(port), __entry->status)
);
DECLARE_EVENT_CLASS(xprt_writelock_event,
TP_PROTO(
const struct rpc_xprt *xprt, const struct rpc_task *task
),
TP_ARGS(xprt, task),
TP_STRUCT__entry(
__field(unsigned int, task_id)
__field(unsigned int, client_id)
__field(unsigned int, snd_task_id)
),
TP_fast_assign(
if (task) {
__entry->task_id = task->tk_pid;
__entry->client_id = task->tk_client ?
task->tk_client->cl_clid : -1;
} else {
__entry->task_id = -1;
__entry->client_id = -1;
}
__entry->snd_task_id = xprt->snd_task ?
xprt->snd_task->tk_pid : -1;
),
TP_printk("task:%u@%u snd_task:%u",
__entry->task_id, __entry->client_id,
__entry->snd_task_id)
);
#define DEFINE_WRITELOCK_EVENT(name) \
DEFINE_EVENT(xprt_writelock_event, xprt_##name, \
TP_PROTO( \
const struct rpc_xprt *xprt, \
const struct rpc_task *task \
), \
TP_ARGS(xprt, task))
DEFINE_WRITELOCK_EVENT(reserve_xprt);
DEFINE_WRITELOCK_EVENT(release_xprt);
DECLARE_EVENT_CLASS(xprt_cong_event,
TP_PROTO(
const struct rpc_xprt *xprt, const struct rpc_task *task
),
TP_ARGS(xprt, task),
TP_STRUCT__entry(
__field(unsigned int, task_id)
__field(unsigned int, client_id)
__field(unsigned int, snd_task_id)
__field(unsigned long, cong)
__field(unsigned long, cwnd)
__field(bool, wait)
),
TP_fast_assign(
if (task) {
__entry->task_id = task->tk_pid;
__entry->client_id = task->tk_client ?
task->tk_client->cl_clid : -1;
} else {
__entry->task_id = -1;
__entry->client_id = -1;
}
__entry->snd_task_id = xprt->snd_task ?
xprt->snd_task->tk_pid : -1;
__entry->cong = xprt->cong;
__entry->cwnd = xprt->cwnd;
__entry->wait = test_bit(XPRT_CWND_WAIT, &xprt->state);
),
TP_printk("task:%u@%u snd_task:%u cong=%lu cwnd=%lu%s",
__entry->task_id, __entry->client_id,
__entry->snd_task_id, __entry->cong, __entry->cwnd,
__entry->wait ? " (wait)" : "")
);
#define DEFINE_CONG_EVENT(name) \
DEFINE_EVENT(xprt_cong_event, xprt_##name, \
TP_PROTO( \
const struct rpc_xprt *xprt, \
const struct rpc_task *task \
), \
TP_ARGS(xprt, task))
DEFINE_CONG_EVENT(reserve_cong);
DEFINE_CONG_EVENT(release_cong);
DEFINE_CONG_EVENT(get_cong);
DEFINE_CONG_EVENT(put_cong);
TRACE_EVENT(xs_stream_read_data,
TP_PROTO(struct rpc_xprt *xprt, ssize_t err, size_t total),

View File

@ -591,6 +591,9 @@ struct rpc_clnt *rpc_create(struct rpc_create_args *args)
xprt->resvport = 1;
if (args->flags & RPC_CLNT_CREATE_NONPRIVPORT)
xprt->resvport = 0;
xprt->reuseport = 0;
if (args->flags & RPC_CLNT_CREATE_REUSEPORT)
xprt->reuseport = 1;
clnt = rpc_create_xprt(args, xprt);
if (IS_ERR(clnt) || args->nconnect <= 1)
@ -1676,8 +1679,6 @@ call_reserveresult(struct rpc_task *task)
return;
}
printk(KERN_ERR "%s: status=%d, but no request slot, exiting\n",
__func__, status);
rpc_call_rpcerror(task, -EIO);
return;
}
@ -1686,11 +1687,8 @@ call_reserveresult(struct rpc_task *task)
* Even though there was an error, we may have acquired
* a request slot somehow. Make sure not to leak it.
*/
if (task->tk_rqstp) {
printk(KERN_ERR "%s: status=%d, request allocated anyway\n",
__func__, status);
if (task->tk_rqstp)
xprt_release(task);
}
switch (status) {
case -ENOMEM:
@ -1699,14 +1697,9 @@ call_reserveresult(struct rpc_task *task)
case -EAGAIN: /* woken up; retry */
task->tk_action = call_retry_reserve;
return;
case -EIO: /* probably a shutdown */
break;
default:
printk(KERN_ERR "%s: unrecognized error %d, exiting\n",
__func__, status);
break;
rpc_call_rpcerror(task, status);
}
rpc_call_rpcerror(task, status);
}
/*
@ -2906,7 +2899,7 @@ int rpc_clnt_add_xprt(struct rpc_clnt *clnt,
struct rpc_xprt *xprt;
unsigned long connect_timeout;
unsigned long reconnect_timeout;
unsigned char resvport;
unsigned char resvport, reuseport;
int ret = 0;
rcu_read_lock();
@ -2918,6 +2911,7 @@ int rpc_clnt_add_xprt(struct rpc_clnt *clnt,
return -EAGAIN;
}
resvport = xprt->resvport;
reuseport = xprt->reuseport;
connect_timeout = xprt->connect_timeout;
reconnect_timeout = xprt->max_reconnect_timeout;
rcu_read_unlock();
@ -2928,6 +2922,7 @@ int rpc_clnt_add_xprt(struct rpc_clnt *clnt,
goto out_put_switch;
}
xprt->resvport = resvport;
xprt->reuseport = reuseport;
if (xprt->ops->set_connect_timeout != NULL)
xprt->ops->set_connect_timeout(xprt,
connect_timeout,

View File

@ -260,7 +260,7 @@ static void __rpc_init_priority_wait_queue(struct rpc_wait_queue *queue, const c
rpc_reset_waitqueue_priority(queue);
queue->qlen = 0;
queue->timer_list.expires = 0;
INIT_DEFERRABLE_WORK(&queue->timer_list.dwork, __rpc_queue_timer_fn);
INIT_DELAYED_WORK(&queue->timer_list.dwork, __rpc_queue_timer_fn);
INIT_LIST_HEAD(&queue->timer_list.list);
rpc_assign_waitqueue_name(queue, qname);
}
@ -824,6 +824,7 @@ rpc_reset_task_statistics(struct rpc_task *task)
*/
void rpc_exit_task(struct rpc_task *task)
{
trace_rpc_task_end(task, task->tk_action);
task->tk_action = NULL;
if (task->tk_ops->rpc_count_stats)
task->tk_ops->rpc_count_stats(task, task->tk_calldata);

View File

@ -436,13 +436,12 @@ xdr_shrink_bufhead(struct xdr_buf *buf, size_t len)
}
/**
* xdr_shrink_pagelen
* xdr_shrink_pagelen - shrinks buf->pages by up to @len bytes
* @buf: xdr_buf
* @len: bytes to remove from buf->pages
*
* Shrinks XDR buffer's page array buf->pages by
* 'len' bytes. The extra data is not lost, but is instead
* moved into the tail.
* The extra data is not lost, but is instead moved into buf->tail.
* Returns the actual number of bytes moved.
*/
static unsigned int
xdr_shrink_pagelen(struct xdr_buf *buf, size_t len)
@ -455,8 +454,8 @@ xdr_shrink_pagelen(struct xdr_buf *buf, size_t len)
result = 0;
tail = buf->tail;
BUG_ON (len > pglen);
if (len > buf->page_len)
len = buf-> page_len;
tailbuf_len = buf->buflen - buf->head->iov_len - buf->page_len;
/* Shift the tail first */

View File

@ -205,20 +205,20 @@ int xprt_reserve_xprt(struct rpc_xprt *xprt, struct rpc_task *task)
if (test_and_set_bit(XPRT_LOCKED, &xprt->state)) {
if (task == xprt->snd_task)
return 1;
goto out_locked;
goto out_sleep;
}
if (test_bit(XPRT_WRITE_SPACE, &xprt->state))
goto out_unlock;
xprt->snd_task = task;
out_locked:
trace_xprt_reserve_xprt(xprt, task);
return 1;
out_unlock:
xprt_clear_locked(xprt);
out_sleep:
dprintk("RPC: %5u failed to lock transport %p\n",
task->tk_pid, xprt);
task->tk_status = -EAGAIN;
if (RPC_IS_SOFT(task))
rpc_sleep_on_timeout(&xprt->sending, task, NULL,
@ -269,23 +269,22 @@ int xprt_reserve_xprt_cong(struct rpc_xprt *xprt, struct rpc_task *task)
if (test_and_set_bit(XPRT_LOCKED, &xprt->state)) {
if (task == xprt->snd_task)
return 1;
goto out_locked;
goto out_sleep;
}
if (req == NULL) {
xprt->snd_task = task;
return 1;
goto out_locked;
}
if (test_bit(XPRT_WRITE_SPACE, &xprt->state))
goto out_unlock;
if (!xprt_need_congestion_window_wait(xprt)) {
xprt->snd_task = task;
return 1;
goto out_locked;
}
out_unlock:
xprt_clear_locked(xprt);
out_sleep:
dprintk("RPC: %5u failed to lock transport %p\n", task->tk_pid, xprt);
task->tk_status = -EAGAIN;
if (RPC_IS_SOFT(task))
rpc_sleep_on_timeout(&xprt->sending, task, NULL,
@ -293,6 +292,9 @@ out_sleep:
else
rpc_sleep_on(&xprt->sending, task, NULL);
return 0;
out_locked:
trace_xprt_reserve_cong(xprt, task);
return 1;
}
EXPORT_SYMBOL_GPL(xprt_reserve_xprt_cong);
@ -357,6 +359,7 @@ void xprt_release_xprt(struct rpc_xprt *xprt, struct rpc_task *task)
xprt_clear_locked(xprt);
__xprt_lock_write_next(xprt);
}
trace_xprt_release_xprt(xprt, task);
}
EXPORT_SYMBOL_GPL(xprt_release_xprt);
@ -374,6 +377,7 @@ void xprt_release_xprt_cong(struct rpc_xprt *xprt, struct rpc_task *task)
xprt_clear_locked(xprt);
__xprt_lock_write_next_cong(xprt);
}
trace_xprt_release_cong(xprt, task);
}
EXPORT_SYMBOL_GPL(xprt_release_xprt_cong);
@ -395,8 +399,7 @@ __xprt_get_cong(struct rpc_xprt *xprt, struct rpc_rqst *req)
{
if (req->rq_cong)
return 1;
dprintk("RPC: %5u xprt_cwnd_limited cong = %lu cwnd = %lu\n",
req->rq_task->tk_pid, xprt->cong, xprt->cwnd);
trace_xprt_get_cong(xprt, req->rq_task);
if (RPCXPRT_CONGESTED(xprt)) {
xprt_set_congestion_window_wait(xprt);
return 0;
@ -418,6 +421,7 @@ __xprt_put_cong(struct rpc_xprt *xprt, struct rpc_rqst *req)
req->rq_cong = 0;
xprt->cong -= RPC_CWNDSCALE;
xprt_test_and_clear_congestion_window_wait(xprt);
trace_xprt_put_cong(xprt, req->rq_task);
__xprt_lock_write_next_cong(xprt);
}

View File

@ -79,7 +79,7 @@ static int rpcrdma_bc_marshal_reply(struct rpc_rqst *rqst)
*p = xdr_zero;
if (rpcrdma_prepare_send_sges(r_xprt, req, RPCRDMA_HDRLEN_MIN,
&rqst->rq_snd_buf, rpcrdma_noch))
&rqst->rq_snd_buf, rpcrdma_noch_pullup))
return -EIO;
trace_xprtrdma_cb_reply(rqst);

View File

@ -36,8 +36,8 @@
* connect worker from running concurrently.
*
* When the underlying transport disconnects, MRs that are in flight
* are flushed and are likely unusable. Thus all flushed MRs are
* destroyed. New MRs are created on demand.
* are flushed and are likely unusable. Thus all MRs are destroyed.
* New MRs are created on demand.
*/
#include <linux/sunrpc/rpc_rdma.h>
@ -88,8 +88,10 @@ void frwr_release_mr(struct rpcrdma_mr *mr)
kfree(mr);
}
static void frwr_mr_recycle(struct rpcrdma_xprt *r_xprt, struct rpcrdma_mr *mr)
static void frwr_mr_recycle(struct rpcrdma_mr *mr)
{
struct rpcrdma_xprt *r_xprt = mr->mr_xprt;
trace_xprtrdma_mr_recycle(mr);
if (mr->mr_dir != DMA_NONE) {
@ -107,32 +109,6 @@ static void frwr_mr_recycle(struct rpcrdma_xprt *r_xprt, struct rpcrdma_mr *mr)
frwr_release_mr(mr);
}
/* MRs are dynamically allocated, so simply clean up and release the MR.
* A replacement MR will subsequently be allocated on demand.
*/
static void
frwr_mr_recycle_worker(struct work_struct *work)
{
struct rpcrdma_mr *mr = container_of(work, struct rpcrdma_mr,
mr_recycle);
frwr_mr_recycle(mr->mr_xprt, mr);
}
/* frwr_recycle - Discard MRs
* @req: request to reset
*
* Used after a reconnect. These MRs could be in flight, we can't
* tell. Safe thing to do is release them.
*/
void frwr_recycle(struct rpcrdma_req *req)
{
struct rpcrdma_mr *mr;
while ((mr = rpcrdma_mr_pop(&req->rl_registered)))
frwr_mr_recycle(mr->mr_xprt, mr);
}
/* frwr_reset - Place MRs back on the free list
* @req: request to reset
*
@ -166,9 +142,6 @@ int frwr_init_mr(struct rpcrdma_ia *ia, struct rpcrdma_mr *mr)
struct ib_mr *frmr;
int rc;
/* NB: ib_alloc_mr and device drivers typically allocate
* memory with GFP_KERNEL.
*/
frmr = ib_alloc_mr(ia->ri_pd, ia->ri_mrtype, depth);
if (IS_ERR(frmr))
goto out_mr_err;
@ -180,7 +153,6 @@ int frwr_init_mr(struct rpcrdma_ia *ia, struct rpcrdma_mr *mr)
mr->frwr.fr_mr = frmr;
mr->mr_dir = DMA_NONE;
INIT_LIST_HEAD(&mr->mr_list);
INIT_WORK(&mr->mr_recycle, frwr_mr_recycle_worker);
init_completion(&mr->frwr.fr_linv_done);
sg_init_table(sg, depth);
@ -424,7 +396,7 @@ int frwr_send(struct rpcrdma_ia *ia, struct rpcrdma_req *req)
struct ib_send_wr *post_wr;
struct rpcrdma_mr *mr;
post_wr = &req->rl_sendctx->sc_wr;
post_wr = &req->rl_wr;
list_for_each_entry(mr, &req->rl_registered, mr_list) {
struct rpcrdma_frwr *frwr;
@ -440,9 +412,6 @@ int frwr_send(struct rpcrdma_ia *ia, struct rpcrdma_req *req)
post_wr = &frwr->fr_regwr.wr;
}
/* If ib_post_send fails, the next ->send_request for
* @req will queue these MRs for recovery.
*/
return ib_post_send(ia->ri_id->qp, post_wr, NULL);
}
@ -468,7 +437,7 @@ void frwr_reminv(struct rpcrdma_rep *rep, struct list_head *mrs)
static void __frwr_release_mr(struct ib_wc *wc, struct rpcrdma_mr *mr)
{
if (wc->status != IB_WC_SUCCESS)
rpcrdma_mr_recycle(mr);
frwr_mr_recycle(mr);
else
rpcrdma_mr_put(mr);
}
@ -570,7 +539,6 @@ void frwr_unmap_sync(struct rpcrdma_xprt *r_xprt, struct rpcrdma_req *req)
*/
bad_wr = NULL;
rc = ib_post_send(r_xprt->rx_ia.ri_id->qp, first, &bad_wr);
trace_xprtrdma_post_send(req, rc);
/* The final LOCAL_INV WR in the chain is supposed to
* do the wake. If it was never posted, the wake will
@ -583,6 +551,7 @@ void frwr_unmap_sync(struct rpcrdma_xprt *r_xprt, struct rpcrdma_req *req)
/* Recycle MRs in the LOCAL_INV chain that did not get posted.
*/
trace_xprtrdma_post_linv(req, rc);
while (bad_wr) {
frwr = container_of(bad_wr, struct rpcrdma_frwr,
fr_invwr);
@ -590,7 +559,7 @@ void frwr_unmap_sync(struct rpcrdma_xprt *r_xprt, struct rpcrdma_req *req)
bad_wr = bad_wr->next;
list_del_init(&mr->mr_list);
rpcrdma_mr_recycle(mr);
frwr_mr_recycle(mr);
}
}
@ -673,18 +642,18 @@ void frwr_unmap_async(struct rpcrdma_xprt *r_xprt, struct rpcrdma_req *req)
*/
bad_wr = NULL;
rc = ib_post_send(r_xprt->rx_ia.ri_id->qp, first, &bad_wr);
trace_xprtrdma_post_send(req, rc);
if (!rc)
return;
/* Recycle MRs in the LOCAL_INV chain that did not get posted.
*/
trace_xprtrdma_post_linv(req, rc);
while (bad_wr) {
frwr = container_of(bad_wr, struct rpcrdma_frwr, fr_invwr);
mr = container_of(frwr, struct rpcrdma_mr, frwr);
bad_wr = bad_wr->next;
rpcrdma_mr_recycle(mr);
frwr_mr_recycle(mr);
}
/* The final LOCAL_INV WR in the chain is supposed to

View File

@ -78,8 +78,6 @@ static unsigned int rpcrdma_max_call_header_size(unsigned int maxsegs)
size += rpcrdma_segment_maxsz * sizeof(__be32);
size += sizeof(__be32); /* list discriminator */
dprintk("RPC: %s: max call header size = %u\n",
__func__, size);
return size;
}
@ -100,8 +98,6 @@ static unsigned int rpcrdma_max_reply_header_size(unsigned int maxsegs)
size += maxsegs * rpcrdma_segment_maxsz * sizeof(__be32);
size += sizeof(__be32); /* list discriminator */
dprintk("RPC: %s: max reply header size = %u\n",
__func__, size);
return size;
}
@ -363,8 +359,7 @@ static struct rpcrdma_mr_seg *rpcrdma_mr_prepare(struct rpcrdma_xprt *r_xprt,
out_getmr_err:
trace_xprtrdma_nomrs(req);
xprt_wait_for_buffer_space(&r_xprt->rx_xprt);
if (r_xprt->rx_ep.rep_connected != -ENODEV)
schedule_work(&r_xprt->rx_buf.rb_refresh_worker);
rpcrdma_mrs_refresh(r_xprt);
return ERR_PTR(-EAGAIN);
}
@ -393,7 +388,7 @@ static int rpcrdma_encode_read_list(struct rpcrdma_xprt *r_xprt,
unsigned int pos;
int nsegs;
if (rtype == rpcrdma_noch)
if (rtype == rpcrdma_noch_pullup || rtype == rpcrdma_noch_mapped)
goto done;
pos = rqst->rq_snd_buf.head[0].iov_len;
@ -565,6 +560,7 @@ static void rpcrdma_sendctx_done(struct kref *kref)
*/
void rpcrdma_sendctx_unmap(struct rpcrdma_sendctx *sc)
{
struct rpcrdma_regbuf *rb = sc->sc_req->rl_sendbuf;
struct ib_sge *sge;
if (!sc->sc_unmap_count)
@ -576,7 +572,7 @@ void rpcrdma_sendctx_unmap(struct rpcrdma_sendctx *sc)
*/
for (sge = &sc->sc_sges[2]; sc->sc_unmap_count;
++sge, --sc->sc_unmap_count)
ib_dma_unmap_page(sc->sc_device, sge->addr, sge->length,
ib_dma_unmap_page(rdmab_device(rb), sge->addr, sge->length,
DMA_TO_DEVICE);
kref_put(&sc->sc_req->rl_kref, rpcrdma_sendctx_done);
@ -589,149 +585,228 @@ static bool rpcrdma_prepare_hdr_sge(struct rpcrdma_xprt *r_xprt,
{
struct rpcrdma_sendctx *sc = req->rl_sendctx;
struct rpcrdma_regbuf *rb = req->rl_rdmabuf;
struct ib_sge *sge = sc->sc_sges;
struct ib_sge *sge = &sc->sc_sges[req->rl_wr.num_sge++];
if (!rpcrdma_regbuf_dma_map(r_xprt, rb))
goto out_regbuf;
return false;
sge->addr = rdmab_addr(rb);
sge->length = len;
sge->lkey = rdmab_lkey(rb);
ib_dma_sync_single_for_device(rdmab_device(rb), sge->addr, sge->length,
DMA_TO_DEVICE);
sc->sc_wr.num_sge++;
return true;
}
/* The head iovec is straightforward, as it is usually already
* DMA-mapped. Sync the content that has changed.
*/
static bool rpcrdma_prepare_head_iov(struct rpcrdma_xprt *r_xprt,
struct rpcrdma_req *req, unsigned int len)
{
struct rpcrdma_sendctx *sc = req->rl_sendctx;
struct ib_sge *sge = &sc->sc_sges[req->rl_wr.num_sge++];
struct rpcrdma_regbuf *rb = req->rl_sendbuf;
if (!rpcrdma_regbuf_dma_map(r_xprt, rb))
return false;
sge->addr = rdmab_addr(rb);
sge->length = len;
sge->lkey = rdmab_lkey(rb);
ib_dma_sync_single_for_device(rdmab_device(rb), sge->addr, sge->length,
DMA_TO_DEVICE);
return true;
}
/* If there is a page list present, DMA map and prepare an
* SGE for each page to be sent.
*/
static bool rpcrdma_prepare_pagelist(struct rpcrdma_req *req,
struct xdr_buf *xdr)
{
struct rpcrdma_sendctx *sc = req->rl_sendctx;
struct rpcrdma_regbuf *rb = req->rl_sendbuf;
unsigned int page_base, len, remaining;
struct page **ppages;
struct ib_sge *sge;
ppages = xdr->pages + (xdr->page_base >> PAGE_SHIFT);
page_base = offset_in_page(xdr->page_base);
remaining = xdr->page_len;
while (remaining) {
sge = &sc->sc_sges[req->rl_wr.num_sge++];
len = min_t(unsigned int, PAGE_SIZE - page_base, remaining);
sge->addr = ib_dma_map_page(rdmab_device(rb), *ppages,
page_base, len, DMA_TO_DEVICE);
if (ib_dma_mapping_error(rdmab_device(rb), sge->addr))
goto out_mapping_err;
sge->length = len;
sge->lkey = rdmab_lkey(rb);
sc->sc_unmap_count++;
ppages++;
remaining -= len;
page_base = 0;
}
return true;
out_regbuf:
pr_err("rpcrdma: failed to DMA map a Send buffer\n");
out_mapping_err:
trace_xprtrdma_dma_maperr(sge->addr);
return false;
}
/* Prepare the Send SGEs. The head and tail iovec, and each entry
* in the page list, gets its own SGE.
/* The tail iovec may include an XDR pad for the page list,
* as well as additional content, and may not reside in the
* same page as the head iovec.
*/
static bool rpcrdma_prepare_msg_sges(struct rpcrdma_xprt *r_xprt,
struct rpcrdma_req *req,
static bool rpcrdma_prepare_tail_iov(struct rpcrdma_req *req,
struct xdr_buf *xdr,
enum rpcrdma_chunktype rtype)
unsigned int page_base, unsigned int len)
{
struct rpcrdma_sendctx *sc = req->rl_sendctx;
unsigned int sge_no, page_base, len, remaining;
struct ib_sge *sge = &sc->sc_sges[req->rl_wr.num_sge++];
struct rpcrdma_regbuf *rb = req->rl_sendbuf;
struct ib_sge *sge = sc->sc_sges;
struct page *page, **ppages;
struct page *page = virt_to_page(xdr->tail[0].iov_base);
/* The head iovec is straightforward, as it is already
* DMA-mapped. Sync the content that has changed.
*/
if (!rpcrdma_regbuf_dma_map(r_xprt, rb))
goto out_regbuf;
sc->sc_device = rdmab_device(rb);
sge_no = 1;
sge[sge_no].addr = rdmab_addr(rb);
sge[sge_no].length = xdr->head[0].iov_len;
sge[sge_no].lkey = rdmab_lkey(rb);
ib_dma_sync_single_for_device(rdmab_device(rb), sge[sge_no].addr,
sge[sge_no].length, DMA_TO_DEVICE);
sge->addr = ib_dma_map_page(rdmab_device(rb), page, page_base, len,
DMA_TO_DEVICE);
if (ib_dma_mapping_error(rdmab_device(rb), sge->addr))
goto out_mapping_err;
/* If there is a Read chunk, the page list is being handled
* via explicit RDMA, and thus is skipped here. However, the
* tail iovec may include an XDR pad for the page list, as
* well as additional content, and may not reside in the
* same page as the head iovec.
*/
if (rtype == rpcrdma_readch) {
len = xdr->tail[0].iov_len;
/* Do not include the tail if it is only an XDR pad */
if (len < 4)
goto out;
page = virt_to_page(xdr->tail[0].iov_base);
page_base = offset_in_page(xdr->tail[0].iov_base);
/* If the content in the page list is an odd length,
* xdr_write_pages() has added a pad at the beginning
* of the tail iovec. Force the tail's non-pad content
* to land at the next XDR position in the Send message.
*/
page_base += len & 3;
len -= len & 3;
goto map_tail;
}
/* If there is a page list present, temporarily DMA map
* and prepare an SGE for each page to be sent.
*/
if (xdr->page_len) {
ppages = xdr->pages + (xdr->page_base >> PAGE_SHIFT);
page_base = offset_in_page(xdr->page_base);
remaining = xdr->page_len;
while (remaining) {
sge_no++;
if (sge_no > RPCRDMA_MAX_SEND_SGES - 2)
goto out_mapping_overflow;
len = min_t(u32, PAGE_SIZE - page_base, remaining);
sge[sge_no].addr =
ib_dma_map_page(rdmab_device(rb), *ppages,
page_base, len, DMA_TO_DEVICE);
if (ib_dma_mapping_error(rdmab_device(rb),
sge[sge_no].addr))
goto out_mapping_err;
sge[sge_no].length = len;
sge[sge_no].lkey = rdmab_lkey(rb);
sc->sc_unmap_count++;
ppages++;
remaining -= len;
page_base = 0;
}
}
/* The tail iovec is not always constructed in the same
* page where the head iovec resides (see, for example,
* gss_wrap_req_priv). To neatly accommodate that case,
* DMA map it separately.
*/
if (xdr->tail[0].iov_len) {
page = virt_to_page(xdr->tail[0].iov_base);
page_base = offset_in_page(xdr->tail[0].iov_base);
len = xdr->tail[0].iov_len;
map_tail:
sge_no++;
sge[sge_no].addr =
ib_dma_map_page(rdmab_device(rb), page, page_base, len,
DMA_TO_DEVICE);
if (ib_dma_mapping_error(rdmab_device(rb), sge[sge_no].addr))
goto out_mapping_err;
sge[sge_no].length = len;
sge[sge_no].lkey = rdmab_lkey(rb);
sc->sc_unmap_count++;
}
out:
sc->sc_wr.num_sge += sge_no;
if (sc->sc_unmap_count)
kref_get(&req->rl_kref);
sge->length = len;
sge->lkey = rdmab_lkey(rb);
++sc->sc_unmap_count;
return true;
out_regbuf:
pr_err("rpcrdma: failed to DMA map a Send buffer\n");
return false;
out_mapping_overflow:
rpcrdma_sendctx_unmap(sc);
pr_err("rpcrdma: too many Send SGEs (%u)\n", sge_no);
return false;
out_mapping_err:
rpcrdma_sendctx_unmap(sc);
trace_xprtrdma_dma_maperr(sge[sge_no].addr);
trace_xprtrdma_dma_maperr(sge->addr);
return false;
}
/* Copy the tail to the end of the head buffer.
*/
static void rpcrdma_pullup_tail_iov(struct rpcrdma_xprt *r_xprt,
struct rpcrdma_req *req,
struct xdr_buf *xdr)
{
unsigned char *dst;
dst = (unsigned char *)xdr->head[0].iov_base;
dst += xdr->head[0].iov_len + xdr->page_len;
memmove(dst, xdr->tail[0].iov_base, xdr->tail[0].iov_len);
r_xprt->rx_stats.pullup_copy_count += xdr->tail[0].iov_len;
}
/* Copy pagelist content into the head buffer.
*/
static void rpcrdma_pullup_pagelist(struct rpcrdma_xprt *r_xprt,
struct rpcrdma_req *req,
struct xdr_buf *xdr)
{
unsigned int len, page_base, remaining;
struct page **ppages;
unsigned char *src, *dst;
dst = (unsigned char *)xdr->head[0].iov_base;
dst += xdr->head[0].iov_len;
ppages = xdr->pages + (xdr->page_base >> PAGE_SHIFT);
page_base = offset_in_page(xdr->page_base);
remaining = xdr->page_len;
while (remaining) {
src = page_address(*ppages);
src += page_base;
len = min_t(unsigned int, PAGE_SIZE - page_base, remaining);
memcpy(dst, src, len);
r_xprt->rx_stats.pullup_copy_count += len;
ppages++;
dst += len;
remaining -= len;
page_base = 0;
}
}
/* Copy the contents of @xdr into @rl_sendbuf and DMA sync it.
* When the head, pagelist, and tail are small, a pull-up copy
* is considerably less costly than DMA mapping the components
* of @xdr.
*
* Assumptions:
* - the caller has already verified that the total length
* of the RPC Call body will fit into @rl_sendbuf.
*/
static bool rpcrdma_prepare_noch_pullup(struct rpcrdma_xprt *r_xprt,
struct rpcrdma_req *req,
struct xdr_buf *xdr)
{
if (unlikely(xdr->tail[0].iov_len))
rpcrdma_pullup_tail_iov(r_xprt, req, xdr);
if (unlikely(xdr->page_len))
rpcrdma_pullup_pagelist(r_xprt, req, xdr);
/* The whole RPC message resides in the head iovec now */
return rpcrdma_prepare_head_iov(r_xprt, req, xdr->len);
}
static bool rpcrdma_prepare_noch_mapped(struct rpcrdma_xprt *r_xprt,
struct rpcrdma_req *req,
struct xdr_buf *xdr)
{
struct kvec *tail = &xdr->tail[0];
if (!rpcrdma_prepare_head_iov(r_xprt, req, xdr->head[0].iov_len))
return false;
if (xdr->page_len)
if (!rpcrdma_prepare_pagelist(req, xdr))
return false;
if (tail->iov_len)
if (!rpcrdma_prepare_tail_iov(req, xdr,
offset_in_page(tail->iov_base),
tail->iov_len))
return false;
if (req->rl_sendctx->sc_unmap_count)
kref_get(&req->rl_kref);
return true;
}
static bool rpcrdma_prepare_readch(struct rpcrdma_xprt *r_xprt,
struct rpcrdma_req *req,
struct xdr_buf *xdr)
{
if (!rpcrdma_prepare_head_iov(r_xprt, req, xdr->head[0].iov_len))
return false;
/* If there is a Read chunk, the page list is being handled
* via explicit RDMA, and thus is skipped here.
*/
/* Do not include the tail if it is only an XDR pad */
if (xdr->tail[0].iov_len > 3) {
unsigned int page_base, len;
/* If the content in the page list is an odd length,
* xdr_write_pages() adds a pad at the beginning of
* the tail iovec. Force the tail's non-pad content to
* land at the next XDR position in the Send message.
*/
page_base = offset_in_page(xdr->tail[0].iov_base);
len = xdr->tail[0].iov_len;
page_base += len & 3;
len -= len & 3;
if (!rpcrdma_prepare_tail_iov(req, xdr, page_base, len))
return false;
kref_get(&req->rl_kref);
}
return true;
}
/**
* rpcrdma_prepare_send_sges - Construct SGEs for a Send WR
* @r_xprt: controlling transport
@ -742,31 +817,53 @@ out_mapping_err:
*
* Returns 0 on success; otherwise a negative errno is returned.
*/
int
rpcrdma_prepare_send_sges(struct rpcrdma_xprt *r_xprt,
struct rpcrdma_req *req, u32 hdrlen,
struct xdr_buf *xdr, enum rpcrdma_chunktype rtype)
inline int rpcrdma_prepare_send_sges(struct rpcrdma_xprt *r_xprt,
struct rpcrdma_req *req, u32 hdrlen,
struct xdr_buf *xdr,
enum rpcrdma_chunktype rtype)
{
int ret;
ret = -EAGAIN;
req->rl_sendctx = rpcrdma_sendctx_get_locked(r_xprt);
if (!req->rl_sendctx)
goto err;
req->rl_sendctx->sc_wr.num_sge = 0;
goto out_nosc;
req->rl_sendctx->sc_unmap_count = 0;
req->rl_sendctx->sc_req = req;
kref_init(&req->rl_kref);
req->rl_wr.wr_cqe = &req->rl_sendctx->sc_cqe;
req->rl_wr.sg_list = req->rl_sendctx->sc_sges;
req->rl_wr.num_sge = 0;
req->rl_wr.opcode = IB_WR_SEND;
ret = -EIO;
if (!rpcrdma_prepare_hdr_sge(r_xprt, req, hdrlen))
goto err;
if (rtype != rpcrdma_areadch)
if (!rpcrdma_prepare_msg_sges(r_xprt, req, xdr, rtype))
goto err;
goto out_unmap;
switch (rtype) {
case rpcrdma_noch_pullup:
if (!rpcrdma_prepare_noch_pullup(r_xprt, req, xdr))
goto out_unmap;
break;
case rpcrdma_noch_mapped:
if (!rpcrdma_prepare_noch_mapped(r_xprt, req, xdr))
goto out_unmap;
break;
case rpcrdma_readch:
if (!rpcrdma_prepare_readch(r_xprt, req, xdr))
goto out_unmap;
break;
case rpcrdma_areadch:
break;
default:
goto out_unmap;
}
return 0;
err:
out_unmap:
rpcrdma_sendctx_unmap(req->rl_sendctx);
out_nosc:
trace_xprtrdma_prepsend_failed(&req->rl_slot, ret);
return ret;
}
@ -796,6 +893,7 @@ rpcrdma_marshal_req(struct rpcrdma_xprt *r_xprt, struct rpc_rqst *rqst)
struct rpcrdma_req *req = rpcr_to_rdmar(rqst);
struct xdr_stream *xdr = &req->rl_stream;
enum rpcrdma_chunktype rtype, wtype;
struct xdr_buf *buf = &rqst->rq_snd_buf;
bool ddp_allowed;
__be32 *p;
int ret;
@ -853,8 +951,9 @@ rpcrdma_marshal_req(struct rpcrdma_xprt *r_xprt, struct rpc_rqst *rqst)
*/
if (rpcrdma_args_inline(r_xprt, rqst)) {
*p++ = rdma_msg;
rtype = rpcrdma_noch;
} else if (ddp_allowed && rqst->rq_snd_buf.flags & XDRBUF_WRITE) {
rtype = buf->len < rdmab_length(req->rl_sendbuf) ?
rpcrdma_noch_pullup : rpcrdma_noch_mapped;
} else if (ddp_allowed && buf->flags & XDRBUF_WRITE) {
*p++ = rdma_msg;
rtype = rpcrdma_readch;
} else {
@ -863,12 +962,6 @@ rpcrdma_marshal_req(struct rpcrdma_xprt *r_xprt, struct rpc_rqst *rqst)
rtype = rpcrdma_areadch;
}
/* If this is a retransmit, discard previously registered
* chunks. Very likely the connection has been replaced,
* so these registrations are invalid and unusable.
*/
frwr_recycle(req);
/* This implementation supports the following combinations
* of chunk lists in one RPC-over-RDMA Call message:
*
@ -902,7 +995,7 @@ rpcrdma_marshal_req(struct rpcrdma_xprt *r_xprt, struct rpc_rqst *rqst)
goto out_err;
ret = rpcrdma_prepare_send_sges(r_xprt, req, req->rl_hdrbuf.len,
&rqst->rq_snd_buf, rtype);
buf, rtype);
if (ret)
goto out_err;
@ -916,6 +1009,40 @@ out_err:
return ret;
}
static void __rpcrdma_update_cwnd_locked(struct rpc_xprt *xprt,
struct rpcrdma_buffer *buf,
u32 grant)
{
buf->rb_credits = grant;
xprt->cwnd = grant << RPC_CWNDSHIFT;
}
static void rpcrdma_update_cwnd(struct rpcrdma_xprt *r_xprt, u32 grant)
{
struct rpc_xprt *xprt = &r_xprt->rx_xprt;
spin_lock(&xprt->transport_lock);
__rpcrdma_update_cwnd_locked(xprt, &r_xprt->rx_buf, grant);
spin_unlock(&xprt->transport_lock);
}
/**
* rpcrdma_reset_cwnd - Reset the xprt's congestion window
* @r_xprt: controlling transport instance
*
* Prepare @r_xprt for the next connection by reinitializing
* its credit grant to one (see RFC 8166, Section 3.3.3).
*/
void rpcrdma_reset_cwnd(struct rpcrdma_xprt *r_xprt)
{
struct rpc_xprt *xprt = &r_xprt->rx_xprt;
spin_lock(&xprt->transport_lock);
xprt->cong = 0;
__rpcrdma_update_cwnd_locked(xprt, &r_xprt->rx_buf, 1);
spin_unlock(&xprt->transport_lock);
}
/**
* rpcrdma_inline_fixup - Scatter inline received data into rqst's iovecs
* @rqst: controlling RPC request
@ -955,7 +1082,6 @@ rpcrdma_inline_fixup(struct rpc_rqst *rqst, char *srcp, int copy_len, int pad)
curlen = rqst->rq_rcv_buf.head[0].iov_len;
if (curlen > copy_len)
curlen = copy_len;
trace_xprtrdma_fixup(rqst, copy_len, curlen);
srcp += curlen;
copy_len -= curlen;
@ -975,8 +1101,6 @@ rpcrdma_inline_fixup(struct rpc_rqst *rqst, char *srcp, int copy_len, int pad)
if (curlen > pagelist_len)
curlen = pagelist_len;
trace_xprtrdma_fixup_pg(rqst, i, srcp,
copy_len, curlen);
destp = kmap_atomic(ppages[i]);
memcpy(destp + page_base, srcp, curlen);
flush_dcache_page(ppages[i]);
@ -1008,6 +1132,8 @@ rpcrdma_inline_fixup(struct rpc_rqst *rqst, char *srcp, int copy_len, int pad)
rqst->rq_private_buf.tail[0].iov_base = srcp;
}
if (fixup_copy_count)
trace_xprtrdma_fixup(rqst, fixup_copy_count);
return fixup_copy_count;
}
@ -1356,12 +1482,9 @@ void rpcrdma_reply_handler(struct rpcrdma_rep *rep)
credits = 1; /* don't deadlock */
else if (credits > buf->rb_max_requests)
credits = buf->rb_max_requests;
if (buf->rb_credits != credits) {
spin_lock(&xprt->transport_lock);
buf->rb_credits = credits;
xprt->cwnd = credits << RPC_CWNDSHIFT;
spin_unlock(&xprt->transport_lock);
}
if (buf->rb_credits != credits)
rpcrdma_update_cwnd(r_xprt, credits);
rpcrdma_post_recvs(r_xprt, false);
req = rpcr_to_rdmar(rqst);
if (req->rl_reply) {

View File

@ -243,16 +243,13 @@ xprt_rdma_connect_worker(struct work_struct *work)
rc = rpcrdma_ep_connect(&r_xprt->rx_ep, &r_xprt->rx_ia);
xprt_clear_connecting(xprt);
if (r_xprt->rx_ep.rep_connected > 0) {
if (!xprt_test_and_set_connected(xprt)) {
xprt->stat.connect_count++;
xprt->stat.connect_time += (long)jiffies -
xprt->stat.connect_start;
xprt_wake_pending_tasks(xprt, -EAGAIN);
}
} else {
if (xprt_test_and_clear_connected(xprt))
xprt_wake_pending_tasks(xprt, rc);
xprt->stat.connect_count++;
xprt->stat.connect_time += (long)jiffies -
xprt->stat.connect_start;
xprt_set_connected(xprt);
rc = -EAGAIN;
}
xprt_wake_pending_tasks(xprt, rc);
}
/**
@ -425,12 +422,6 @@ void xprt_rdma_close(struct rpc_xprt *xprt)
return;
rpcrdma_ep_disconnect(ep, ia);
/* Prepare @xprt for the next connection by reinitializing
* its credit grant to one (see RFC 8166, Section 3.3.3).
*/
r_xprt->rx_buf.rb_credits = 1;
xprt->cwnd = RPC_CWNDSHIFT;
out:
xprt->reestablish_timeout = 0;
++xprt->connect_cookie;
@ -450,12 +441,6 @@ xprt_rdma_set_port(struct rpc_xprt *xprt, u16 port)
struct sockaddr *sap = (struct sockaddr *)&xprt->addr;
char buf[8];
dprintk("RPC: %s: setting port for xprt %p (%s:%s) to %u\n",
__func__, xprt,
xprt->address_strings[RPC_DISPLAY_ADDR],
xprt->address_strings[RPC_DISPLAY_PORT],
port);
rpc_set_port(sap, port);
kfree(xprt->address_strings[RPC_DISPLAY_PORT]);
@ -465,6 +450,9 @@ xprt_rdma_set_port(struct rpc_xprt *xprt, u16 port)
kfree(xprt->address_strings[RPC_DISPLAY_HEX_PORT]);
snprintf(buf, sizeof(buf), "%4hx", port);
xprt->address_strings[RPC_DISPLAY_HEX_PORT] = kstrdup(buf, GFP_KERNEL);
trace_xprtrdma_op_setport(container_of(xprt, struct rpcrdma_xprt,
rx_xprt));
}
/**
@ -536,13 +524,12 @@ xprt_rdma_connect(struct rpc_xprt *xprt, struct rpc_task *task)
struct rpcrdma_xprt *r_xprt = rpcx_to_rdmax(xprt);
unsigned long delay;
trace_xprtrdma_op_connect(r_xprt);
delay = 0;
if (r_xprt->rx_ep.rep_connected != 0) {
delay = xprt_reconnect_delay(xprt);
xprt_reconnect_backoff(xprt, RPCRDMA_INIT_REEST_TO);
}
trace_xprtrdma_op_connect(r_xprt, delay);
queue_delayed_work(xprtiod_workqueue, &r_xprt->rx_connect_worker,
delay);
}

View File

@ -74,17 +74,17 @@
/*
* internal functions
*/
static void rpcrdma_sendctx_put_locked(struct rpcrdma_sendctx *sc);
static void rpcrdma_sendctx_put_locked(struct rpcrdma_xprt *r_xprt,
struct rpcrdma_sendctx *sc);
static void rpcrdma_reqs_reset(struct rpcrdma_xprt *r_xprt);
static void rpcrdma_reps_destroy(struct rpcrdma_buffer *buf);
static void rpcrdma_mrs_create(struct rpcrdma_xprt *r_xprt);
static void rpcrdma_mrs_destroy(struct rpcrdma_buffer *buf);
static void rpcrdma_mr_free(struct rpcrdma_mr *mr);
static void rpcrdma_mrs_destroy(struct rpcrdma_xprt *r_xprt);
static struct rpcrdma_regbuf *
rpcrdma_regbuf_alloc(size_t size, enum dma_data_direction direction,
gfp_t flags);
static void rpcrdma_regbuf_dma_unmap(struct rpcrdma_regbuf *rb);
static void rpcrdma_regbuf_free(struct rpcrdma_regbuf *rb);
static void rpcrdma_post_recvs(struct rpcrdma_xprt *r_xprt, bool temp);
/* Wait for outstanding transport work to finish. ib_drain_qp
* handles the drains in the wrong order for us, so open code
@ -125,7 +125,7 @@ rpcrdma_qp_event_handler(struct ib_event *event, void *context)
/**
* rpcrdma_wc_send - Invoked by RDMA provider for each polled Send WC
* @cq: completion queue (ignored)
* @cq: completion queue
* @wc: completed WR
*
*/
@ -138,7 +138,7 @@ rpcrdma_wc_send(struct ib_cq *cq, struct ib_wc *wc)
/* WARNING: Only wr_cqe and status are reliable at this point */
trace_xprtrdma_wc_send(sc, wc);
rpcrdma_sendctx_put_locked(sc);
rpcrdma_sendctx_put_locked((struct rpcrdma_xprt *)cq->cq_context, sc);
}
/**
@ -170,7 +170,6 @@ rpcrdma_wc_receive(struct ib_cq *cq, struct ib_wc *wc)
rdmab_addr(rep->rr_rdmabuf),
wc->byte_len, DMA_FROM_DEVICE);
rpcrdma_post_recvs(r_xprt, false);
rpcrdma_reply_handler(rep);
return;
@ -178,11 +177,11 @@ out_flushed:
rpcrdma_recv_buffer_put(rep);
}
static void
rpcrdma_update_connect_private(struct rpcrdma_xprt *r_xprt,
struct rdma_conn_param *param)
static void rpcrdma_update_cm_private(struct rpcrdma_xprt *r_xprt,
struct rdma_conn_param *param)
{
const struct rpcrdma_connect_private *pmsg = param->private_data;
struct rpcrdma_ep *ep = &r_xprt->rx_ep;
unsigned int rsize, wsize;
/* Default settings for RPC-over-RDMA Version One */
@ -198,13 +197,11 @@ rpcrdma_update_connect_private(struct rpcrdma_xprt *r_xprt,
wsize = rpcrdma_decode_buffer_size(pmsg->cp_recv_size);
}
if (rsize < r_xprt->rx_ep.rep_inline_recv)
r_xprt->rx_ep.rep_inline_recv = rsize;
if (wsize < r_xprt->rx_ep.rep_inline_send)
r_xprt->rx_ep.rep_inline_send = wsize;
dprintk("RPC: %s: max send %u, max recv %u\n", __func__,
r_xprt->rx_ep.rep_inline_send,
r_xprt->rx_ep.rep_inline_recv);
if (rsize < ep->rep_inline_recv)
ep->rep_inline_recv = rsize;
if (wsize < ep->rep_inline_send)
ep->rep_inline_send = wsize;
rpcrdma_set_max_header_sizes(r_xprt);
}
@ -258,7 +255,8 @@ rpcrdma_cm_event_handler(struct rdma_cm_id *id, struct rdma_cm_event *event)
case RDMA_CM_EVENT_ESTABLISHED:
++xprt->connect_cookie;
ep->rep_connected = 1;
rpcrdma_update_connect_private(r_xprt, &event->param.conn);
rpcrdma_update_cm_private(r_xprt, &event->param.conn);
trace_xprtrdma_inline_thresh(r_xprt);
wake_up_all(&ep->rep_connect_wait);
break;
case RDMA_CM_EVENT_CONNECT_ERROR:
@ -298,8 +296,6 @@ rpcrdma_create_id(struct rpcrdma_xprt *xprt, struct rpcrdma_ia *ia)
struct rdma_cm_id *id;
int rc;
trace_xprtrdma_conn_start(xprt);
init_completion(&ia->ri_done);
init_completion(&ia->ri_remove_done);
@ -315,10 +311,8 @@ rpcrdma_create_id(struct rpcrdma_xprt *xprt, struct rpcrdma_ia *ia)
if (rc)
goto out;
rc = wait_for_completion_interruptible_timeout(&ia->ri_done, wtimeout);
if (rc < 0) {
trace_xprtrdma_conn_tout(xprt);
if (rc < 0)
goto out;
}
rc = ia->ri_async_rc;
if (rc)
@ -329,10 +323,8 @@ rpcrdma_create_id(struct rpcrdma_xprt *xprt, struct rpcrdma_ia *ia)
if (rc)
goto out;
rc = wait_for_completion_interruptible_timeout(&ia->ri_done, wtimeout);
if (rc < 0) {
trace_xprtrdma_conn_tout(xprt);
if (rc < 0)
goto out;
}
rc = ia->ri_async_rc;
if (rc)
goto out;
@ -409,8 +401,6 @@ rpcrdma_ia_remove(struct rpcrdma_ia *ia)
struct rpcrdma_buffer *buf = &r_xprt->rx_buf;
struct rpcrdma_req *req;
cancel_work_sync(&buf->rb_refresh_worker);
/* This is similar to rpcrdma_ep_destroy, but:
* - Don't cancel the connect worker.
* - Don't call rpcrdma_ep_disconnect, which waits
@ -437,7 +427,7 @@ rpcrdma_ia_remove(struct rpcrdma_ia *ia)
rpcrdma_regbuf_dma_unmap(req->rl_sendbuf);
rpcrdma_regbuf_dma_unmap(req->rl_recvbuf);
}
rpcrdma_mrs_destroy(buf);
rpcrdma_mrs_destroy(r_xprt);
ib_dealloc_pd(ia->ri_pd);
ia->ri_pd = NULL;
@ -522,7 +512,7 @@ int rpcrdma_ep_create(struct rpcrdma_xprt *r_xprt)
init_waitqueue_head(&ep->rep_connect_wait);
ep->rep_receive_count = 0;
sendcq = ib_alloc_cq_any(ia->ri_id->device, NULL,
sendcq = ib_alloc_cq_any(ia->ri_id->device, r_xprt,
ep->rep_attr.cap.max_send_wr + 1,
IB_POLL_WORKQUEUE);
if (IS_ERR(sendcq)) {
@ -630,8 +620,6 @@ static int rpcrdma_ep_recreate_xprt(struct rpcrdma_xprt *r_xprt,
pr_err("rpcrdma: rdma_create_qp returned %d\n", err);
goto out3;
}
rpcrdma_mrs_create(r_xprt);
return 0;
out3:
@ -649,8 +637,6 @@ static int rpcrdma_ep_reconnect(struct rpcrdma_xprt *r_xprt,
struct rdma_cm_id *id, *old;
int err, rc;
trace_xprtrdma_reconnect(r_xprt);
rpcrdma_ep_disconnect(&r_xprt->rx_ep, ia);
rc = -EHOSTUNREACH;
@ -705,7 +691,6 @@ retry:
memcpy(&qp_init_attr, &ep->rep_attr, sizeof(qp_init_attr));
switch (ep->rep_connected) {
case 0:
dprintk("RPC: %s: connecting...\n", __func__);
rc = rdma_create_qp(ia->ri_id, ia->ri_pd, &qp_init_attr);
if (rc) {
rc = -ENETUNREACH;
@ -726,6 +711,7 @@ retry:
ep->rep_connected = 0;
xprt_clear_connected(xprt);
rpcrdma_reset_cwnd(r_xprt);
rpcrdma_post_recvs(r_xprt, true);
rc = rdma_connect(ia->ri_id, &ep->rep_remote_cma);
@ -742,13 +728,14 @@ retry:
goto out;
}
dprintk("RPC: %s: connected\n", __func__);
rpcrdma_mrs_create(r_xprt);
out:
if (rc)
ep->rep_connected = rc;
out_noupdate:
trace_xprtrdma_connect(r_xprt, rc);
return rc;
}
@ -757,11 +744,8 @@ out_noupdate:
* @ep: endpoint to disconnect
* @ia: associated interface adapter
*
* This is separate from destroy to facilitate the ability
* to reconnect without recreating the endpoint.
*
* This call is not reentrant, and must not be made in parallel
* on the same endpoint.
* Caller serializes. Either the transport send lock is held,
* or we're being called to destroy the transport.
*/
void
rpcrdma_ep_disconnect(struct rpcrdma_ep *ep, struct rpcrdma_ia *ia)
@ -780,6 +764,8 @@ rpcrdma_ep_disconnect(struct rpcrdma_ep *ep, struct rpcrdma_ia *ia)
trace_xprtrdma_disconnect(r_xprt, rc);
rpcrdma_xprt_drain(r_xprt);
rpcrdma_reqs_reset(r_xprt);
rpcrdma_mrs_destroy(r_xprt);
}
/* Fixed-size circular FIFO queue. This implementation is wait-free and
@ -817,9 +803,6 @@ static struct rpcrdma_sendctx *rpcrdma_sendctx_create(struct rpcrdma_ia *ia)
if (!sc)
return NULL;
sc->sc_wr.wr_cqe = &sc->sc_cqe;
sc->sc_wr.sg_list = sc->sc_sges;
sc->sc_wr.opcode = IB_WR_SEND;
sc->sc_cqe.done = rpcrdma_wc_send;
return sc;
}
@ -847,7 +830,6 @@ static int rpcrdma_sendctxs_create(struct rpcrdma_xprt *r_xprt)
if (!sc)
return -ENOMEM;
sc->sc_xprt = r_xprt;
buf->rb_sc_ctxs[i] = sc;
}
@ -910,6 +892,7 @@ out_emptyq:
/**
* rpcrdma_sendctx_put_locked - Release a send context
* @r_xprt: controlling transport instance
* @sc: send context to release
*
* Usage: Called from Send completion to return a sendctxt
@ -917,10 +900,10 @@ out_emptyq:
*
* The caller serializes calls to this function (per transport).
*/
static void
rpcrdma_sendctx_put_locked(struct rpcrdma_sendctx *sc)
static void rpcrdma_sendctx_put_locked(struct rpcrdma_xprt *r_xprt,
struct rpcrdma_sendctx *sc)
{
struct rpcrdma_buffer *buf = &sc->sc_xprt->rx_buf;
struct rpcrdma_buffer *buf = &r_xprt->rx_buf;
unsigned long next_tail;
/* Unmap SGEs of previously completed but unsignaled
@ -938,7 +921,7 @@ rpcrdma_sendctx_put_locked(struct rpcrdma_sendctx *sc)
/* Paired with READ_ONCE */
smp_store_release(&buf->rb_sc_tail, next_tail);
xprt_write_space(&sc->sc_xprt->rx_xprt);
xprt_write_space(&r_xprt->rx_xprt);
}
static void
@ -965,7 +948,7 @@ rpcrdma_mrs_create(struct rpcrdma_xprt *r_xprt)
mr->mr_xprt = r_xprt;
spin_lock(&buf->rb_lock);
list_add(&mr->mr_list, &buf->rb_mrs);
rpcrdma_mr_push(mr, &buf->rb_mrs);
list_add(&mr->mr_all, &buf->rb_all_mrs);
spin_unlock(&buf->rb_lock);
}
@ -986,6 +969,28 @@ rpcrdma_mr_refresh_worker(struct work_struct *work)
xprt_write_space(&r_xprt->rx_xprt);
}
/**
* rpcrdma_mrs_refresh - Wake the MR refresh worker
* @r_xprt: controlling transport instance
*
*/
void rpcrdma_mrs_refresh(struct rpcrdma_xprt *r_xprt)
{
struct rpcrdma_buffer *buf = &r_xprt->rx_buf;
struct rpcrdma_ep *ep = &r_xprt->rx_ep;
/* If there is no underlying device, it's no use to
* wake the refresh worker.
*/
if (ep->rep_connected != -ENODEV) {
/* The work is scheduled on a WQ_MEM_RECLAIM
* workqueue in order to prevent MR allocation
* from recursing into NFS during direct reclaim.
*/
queue_work(xprtiod_workqueue, &buf->rb_refresh_worker);
}
}
/**
* rpcrdma_req_create - Allocate an rpcrdma_req object
* @r_xprt: controlling r_xprt
@ -1042,6 +1047,26 @@ out1:
return NULL;
}
/**
* rpcrdma_reqs_reset - Reset all reqs owned by a transport
* @r_xprt: controlling transport instance
*
* ASSUMPTION: the rb_allreqs list is stable for the duration,
* and thus can be walked without holding rb_lock. Eg. the
* caller is holding the transport send lock to exclude
* device removal or disconnection.
*/
static void rpcrdma_reqs_reset(struct rpcrdma_xprt *r_xprt)
{
struct rpcrdma_buffer *buf = &r_xprt->rx_buf;
struct rpcrdma_req *req;
list_for_each_entry(req, &buf->rb_allreqs, rl_all) {
/* Credits are valid only for one connection */
req->rl_slot.rq_cong = 0;
}
}
static struct rpcrdma_rep *rpcrdma_rep_create(struct rpcrdma_xprt *r_xprt,
bool temp)
{
@ -1125,8 +1150,6 @@ int rpcrdma_buffer_create(struct rpcrdma_xprt *r_xprt)
INIT_LIST_HEAD(&buf->rb_all_mrs);
INIT_WORK(&buf->rb_refresh_worker, rpcrdma_mr_refresh_worker);
rpcrdma_mrs_create(r_xprt);
INIT_LIST_HEAD(&buf->rb_send_bufs);
INIT_LIST_HEAD(&buf->rb_allreqs);
@ -1134,14 +1157,13 @@ int rpcrdma_buffer_create(struct rpcrdma_xprt *r_xprt)
for (i = 0; i < buf->rb_max_requests; i++) {
struct rpcrdma_req *req;
req = rpcrdma_req_create(r_xprt, RPCRDMA_V1_DEF_INLINE_SIZE,
req = rpcrdma_req_create(r_xprt, RPCRDMA_V1_DEF_INLINE_SIZE * 2,
GFP_KERNEL);
if (!req)
goto out;
list_add(&req->rl_list, &buf->rb_send_bufs);
}
buf->rb_credits = 1;
init_llist_head(&buf->rb_free_reps);
rc = rpcrdma_sendctxs_create(r_xprt);
@ -1158,15 +1180,24 @@ out:
* rpcrdma_req_destroy - Destroy an rpcrdma_req object
* @req: unused object to be destroyed
*
* This function assumes that the caller prevents concurrent device
* unload and transport tear-down.
* Relies on caller holding the transport send lock to protect
* removing req->rl_all from buf->rb_all_reqs safely.
*/
void rpcrdma_req_destroy(struct rpcrdma_req *req)
{
struct rpcrdma_mr *mr;
list_del(&req->rl_all);
while (!list_empty(&req->rl_free_mrs))
rpcrdma_mr_free(rpcrdma_mr_pop(&req->rl_free_mrs));
while ((mr = rpcrdma_mr_pop(&req->rl_free_mrs))) {
struct rpcrdma_buffer *buf = &mr->mr_xprt->rx_buf;
spin_lock(&buf->rb_lock);
list_del(&mr->mr_all);
spin_unlock(&buf->rb_lock);
frwr_release_mr(mr);
}
rpcrdma_regbuf_free(req->rl_recvbuf);
rpcrdma_regbuf_free(req->rl_sendbuf);
@ -1174,28 +1205,33 @@ void rpcrdma_req_destroy(struct rpcrdma_req *req)
kfree(req);
}
static void
rpcrdma_mrs_destroy(struct rpcrdma_buffer *buf)
/**
* rpcrdma_mrs_destroy - Release all of a transport's MRs
* @r_xprt: controlling transport instance
*
* Relies on caller holding the transport send lock to protect
* removing mr->mr_list from req->rl_free_mrs safely.
*/
static void rpcrdma_mrs_destroy(struct rpcrdma_xprt *r_xprt)
{
struct rpcrdma_xprt *r_xprt = container_of(buf, struct rpcrdma_xprt,
rx_buf);
struct rpcrdma_buffer *buf = &r_xprt->rx_buf;
struct rpcrdma_mr *mr;
unsigned int count;
count = 0;
cancel_work_sync(&buf->rb_refresh_worker);
spin_lock(&buf->rb_lock);
while ((mr = list_first_entry_or_null(&buf->rb_all_mrs,
struct rpcrdma_mr,
mr_all)) != NULL) {
list_del(&mr->mr_list);
list_del(&mr->mr_all);
spin_unlock(&buf->rb_lock);
frwr_release_mr(mr);
count++;
spin_lock(&buf->rb_lock);
}
spin_unlock(&buf->rb_lock);
r_xprt->rx_stats.mrs_allocated = 0;
}
/**
@ -1209,8 +1245,6 @@ rpcrdma_mrs_destroy(struct rpcrdma_buffer *buf)
void
rpcrdma_buffer_destroy(struct rpcrdma_buffer *buf)
{
cancel_work_sync(&buf->rb_refresh_worker);
rpcrdma_sendctxs_destroy(buf);
rpcrdma_reps_destroy(buf);
@ -1222,8 +1256,6 @@ rpcrdma_buffer_destroy(struct rpcrdma_buffer *buf)
list_del(&req->rl_list);
rpcrdma_req_destroy(req);
}
rpcrdma_mrs_destroy(buf);
}
/**
@ -1264,17 +1296,6 @@ void rpcrdma_mr_put(struct rpcrdma_mr *mr)
rpcrdma_mr_push(mr, &mr->mr_req->rl_free_mrs);
}
static void rpcrdma_mr_free(struct rpcrdma_mr *mr)
{
struct rpcrdma_xprt *r_xprt = mr->mr_xprt;
struct rpcrdma_buffer *buf = &r_xprt->rx_buf;
mr->mr_req = NULL;
spin_lock(&buf->rb_lock);
rpcrdma_mr_push(mr, &buf->rb_mrs);
spin_unlock(&buf->rb_lock);
}
/**
* rpcrdma_buffer_get - Get a request buffer
* @buffers: Buffer pool from which to obtain a buffer
@ -1437,7 +1458,7 @@ rpcrdma_ep_post(struct rpcrdma_ia *ia,
struct rpcrdma_ep *ep,
struct rpcrdma_req *req)
{
struct ib_send_wr *send_wr = &req->rl_sendctx->sc_wr;
struct ib_send_wr *send_wr = &req->rl_wr;
int rc;
if (!ep->rep_send_count || kref_read(&req->rl_kref) > 1) {
@ -1455,8 +1476,13 @@ rpcrdma_ep_post(struct rpcrdma_ia *ia,
return 0;
}
static void
rpcrdma_post_recvs(struct rpcrdma_xprt *r_xprt, bool temp)
/**
* rpcrdma_post_recvs - Refill the Receive Queue
* @r_xprt: controlling transport instance
* @temp: mark Receive buffers to be deleted after use
*
*/
void rpcrdma_post_recvs(struct rpcrdma_xprt *r_xprt, bool temp)
{
struct rpcrdma_buffer *buf = &r_xprt->rx_buf;
struct rpcrdma_ep *ep = &r_xprt->rx_ep;

View File

@ -218,12 +218,8 @@ enum {
/* struct rpcrdma_sendctx - DMA mapped SGEs to unmap after Send completes
*/
struct rpcrdma_req;
struct rpcrdma_xprt;
struct rpcrdma_sendctx {
struct ib_send_wr sc_wr;
struct ib_cqe sc_cqe;
struct ib_device *sc_device;
struct rpcrdma_xprt *sc_xprt;
struct rpcrdma_req *sc_req;
unsigned int sc_unmap_count;
struct ib_sge sc_sges[];
@ -257,7 +253,6 @@ struct rpcrdma_mr {
u32 mr_handle;
u32 mr_length;
u64 mr_offset;
struct work_struct mr_recycle;
struct list_head mr_all;
};
@ -318,6 +313,7 @@ struct rpcrdma_req {
struct rpcrdma_rep *rl_reply;
struct xdr_stream rl_stream;
struct xdr_buf rl_hdrbuf;
struct ib_send_wr rl_wr;
struct rpcrdma_sendctx *rl_sendctx;
struct rpcrdma_regbuf *rl_rdmabuf; /* xprt header */
struct rpcrdma_regbuf *rl_sendbuf; /* rq_snd_buf */
@ -474,6 +470,7 @@ void rpcrdma_ep_disconnect(struct rpcrdma_ep *, struct rpcrdma_ia *);
int rpcrdma_ep_post(struct rpcrdma_ia *, struct rpcrdma_ep *,
struct rpcrdma_req *);
void rpcrdma_post_recvs(struct rpcrdma_xprt *r_xprt, bool temp);
/*
* Buffer calls - xprtrdma/verbs.c
@ -487,12 +484,7 @@ struct rpcrdma_sendctx *rpcrdma_sendctx_get_locked(struct rpcrdma_xprt *r_xprt);
struct rpcrdma_mr *rpcrdma_mr_get(struct rpcrdma_xprt *r_xprt);
void rpcrdma_mr_put(struct rpcrdma_mr *mr);
static inline void
rpcrdma_mr_recycle(struct rpcrdma_mr *mr)
{
schedule_work(&mr->mr_recycle);
}
void rpcrdma_mrs_refresh(struct rpcrdma_xprt *r_xprt);
struct rpcrdma_req *rpcrdma_buffer_get(struct rpcrdma_buffer *);
void rpcrdma_buffer_put(struct rpcrdma_buffer *buffers,
@ -542,7 +534,6 @@ rpcrdma_data_dir(bool writing)
/* Memory registration calls xprtrdma/frwr_ops.c
*/
bool frwr_is_supported(struct ib_device *device);
void frwr_recycle(struct rpcrdma_req *req);
void frwr_reset(struct rpcrdma_req *req);
int frwr_open(struct rpcrdma_ia *ia, struct rpcrdma_ep *ep);
int frwr_init_mr(struct rpcrdma_ia *ia, struct rpcrdma_mr *mr);
@ -563,6 +554,8 @@ void frwr_unmap_async(struct rpcrdma_xprt *r_xprt, struct rpcrdma_req *req);
enum rpcrdma_chunktype {
rpcrdma_noch = 0,
rpcrdma_noch_pullup,
rpcrdma_noch_mapped,
rpcrdma_readch,
rpcrdma_areadch,
rpcrdma_writech,
@ -576,6 +569,7 @@ int rpcrdma_prepare_send_sges(struct rpcrdma_xprt *r_xprt,
void rpcrdma_sendctx_unmap(struct rpcrdma_sendctx *sc);
int rpcrdma_marshal_req(struct rpcrdma_xprt *r_xprt, struct rpc_rqst *rqst);
void rpcrdma_set_max_header_sizes(struct rpcrdma_xprt *);
void rpcrdma_reset_cwnd(struct rpcrdma_xprt *r_xprt);
void rpcrdma_complete_rqst(struct rpcrdma_rep *rep);
void rpcrdma_reply_handler(struct rpcrdma_rep *rep);

View File

@ -1752,7 +1752,7 @@ static void xs_set_port(struct rpc_xprt *xprt, unsigned short port)
static void xs_set_srcport(struct sock_xprt *transport, struct socket *sock)
{
if (transport->srcport == 0)
if (transport->srcport == 0 && transport->xprt.reuseport)
transport->srcport = xs_sock_getport(sock);
}