NFS client updates for Linux 4.13

Stable bugfixes:
 - Fix -EACCESS on commit to DS handling
 - Fix initialization of nfs_page_array->npages
 - Only invalidate dentries that are actually invalid
 
 Features:
 - Enable NFSoRDMA transparent state migration
 - Add support for lookup-by-filehandle
 - Add support for nfs re-exporting
 
 Other bugfixes and cleanups:
 - Christoph cleaned up the way we declare NFS operations
 - Clean up various internal structures
 - Various cleanups to commits
 - Various improvements to error handling
 - Set the dt_type of . and .. entries in NFS v4
 - Make slot allocation more reliable
 - Fix fscache stat printing
 - Fix uninitialized variable warnings
 - Fix potential list overrun in nfs_atomic_open()
 - Fix a race in NFSoRDMA RPC reply handler
 - Fix return size for nfs42_proc_copy()
 - Fix against MAC forgery timing attacks
 -----BEGIN PGP SIGNATURE-----
 
 iQIzBAABCAAdFiEEnZ5MQTpR7cLU7KEp18tUv7ClQOsFAlln4jEACgkQ18tUv7Cl
 QOv2ZxAAwbQN9Dtx4rOZmPe0Xszua23sNN0ja891PodkCjIiZrRelZhLIBAf1rfP
 uSR+jTD8EsBHGt3bzTXg2DHz+o8cGDZuH+uuZX+wRWJPQcKA2pC7zElqnse8nmn5
 4Z1UUdzf42vE4NZ/G1ucqpEiAmOqGJ3s7pCRLLXPvOSSQXqOhiomNDAcGxX05FIv
 Ly4Kr6RIfg/O4oNOZBuuL/tZHodeyOj1vbyjt/4bDQ5MEXlUQfcjJZEsz/2EcNh6
 rAgbquxr1pGCD072pPBwYNH2vLGbgNN41KDDMGI0clp+8p6EhV6BOlgcEoGtZM86
 c0yro2oBOB2vPCv9nGr6JgTOHPKG6ksJ7vWVXrtQEjBGP82AbFfAawLgqZ6Ae8dP
 Sqpx55j4xdm4nyNglCuhq5PlPAogARq/eibR+RbY973Lhzr5bZb3XqlairCkNNEv
 4RbTlxbWjhgrKJ56jVf+KpUDJAVG5viKMD7YDx/bOfLtvPwALbozD7ONrunz5v43
 PgQEvWvVtnQAKp27pqHemTsLFhU6M6eGUEctRnAfB/0ogWZh1X8QXgulpDlqG3kb
 g12kr5hfA0pSfcB0aGXVzJNnHKfW3IY3WBWtxq4xaMY22YkHtuB+78+9/yk3jCAi
 dvimjT2Ko9fE9MnltJ/hC5BU+T+xUxg+1vfwWnKMvMH8SIqjyu4=
 =OpLj
 -----END PGP SIGNATURE-----

Merge tag 'nfs-for-4.13-1' of git://git.linux-nfs.org/projects/anna/linux-nfs

Pull NFS client updates from Anna Schumaker:
 "Stable bugfixes:
   - Fix -EACCESS on commit to DS handling
   - Fix initialization of nfs_page_array->npages
   - Only invalidate dentries that are actually invalid

  Features:
   - Enable NFSoRDMA transparent state migration
   - Add support for lookup-by-filehandle
   - Add support for nfs re-exporting

  Other bugfixes and cleanups:
   - Christoph cleaned up the way we declare NFS operations
   - Clean up various internal structures
   - Various cleanups to commits
   - Various improvements to error handling
   - Set the dt_type of . and .. entries in NFS v4
   - Make slot allocation more reliable
   - Fix fscache stat printing
   - Fix uninitialized variable warnings
   - Fix potential list overrun in nfs_atomic_open()
   - Fix a race in NFSoRDMA RPC reply handler
   - Fix return size for nfs42_proc_copy()
   - Fix against MAC forgery timing attacks"

* tag 'nfs-for-4.13-1' of git://git.linux-nfs.org/projects/anna/linux-nfs: (68 commits)
  NFS: Don't run wake_up_bit() when nobody is waiting...
  nfs: add export operations
  nfs4: add NFSv4 LOOKUPP handlers
  nfs: add a nfs_ilookup helper
  nfs: replace d_add with d_splice_alias in atomic_open
  sunrpc: use constant time memory comparison for mac
  NFSv4.2 fix size storage for nfs42_proc_copy
  xprtrdma: Fix documenting comments in frwr_ops.c
  xprtrdma: Replace PAGE_MASK with offset_in_page()
  xprtrdma: FMR does not need list_del_init()
  xprtrdma: Demote "connect" log messages
  NFSv4.1: Use seqid returned by EXCHANGE_ID after state migration
  NFSv4.1: Handle EXCHGID4_FLAG_CONFIRMED_R during NFSv4.1 migration
  xprtrdma: Don't defer MR recovery if ro_map fails
  xprtrdma: Fix FRWR invalidation error recovery
  xprtrdma: Fix client lock-up after application signal fires
  xprtrdma: Rename rpcrdma_req::rl_free
  xprtrdma: Pass only the list of registered MRs to ro_unmap_sync
  xprtrdma: Pre-mark remotely invalidated MRs
  xprtrdma: On invalidation failure, remove MWs from rl_registered
  ...
This commit is contained in:
Linus Torvalds 2017-07-13 14:35:37 -07:00
commit b86faee6d1
35 changed files with 777 additions and 289 deletions

View File

@ -7,7 +7,7 @@ obj-$(CONFIG_NFS_FS) += nfs.o
CFLAGS_nfstrace.o += -I$(src)
nfs-y := client.o dir.o file.o getroot.o inode.o super.o \
io.o direct.o pagelist.o read.o symlink.o unlink.o \
write.o namespace.o mount_clnt.o nfstrace.o
write.o namespace.o mount_clnt.o nfstrace.o export.o
nfs-$(CONFIG_ROOT_NFS) += nfsroot.o
nfs-$(CONFIG_SYSCTL) += sysctl.o
nfs-$(CONFIG_NFS_FSCACHE) += fscache.o fscache-index.o

View File

@ -151,7 +151,7 @@ struct nfs_cache_array {
struct nfs_cache_array_entry array[0];
};
typedef int (*decode_dirent_t)(struct xdr_stream *, struct nfs_entry *, int);
typedef int (*decode_dirent_t)(struct xdr_stream *, struct nfs_entry *, bool);
typedef struct {
struct file *file;
struct page *page;
@ -165,8 +165,8 @@ typedef struct {
unsigned long timestamp;
unsigned long gencount;
unsigned int cache_entry_index;
unsigned int plus:1;
unsigned int eof:1;
bool plus;
bool eof;
} nfs_readdir_descriptor_t;
/*
@ -355,7 +355,7 @@ int nfs_readdir_xdr_filler(struct page **pages, nfs_readdir_descriptor_t *desc,
if (error == -ENOTSUPP && desc->plus) {
NFS_SERVER(inode)->caps &= ~NFS_CAP_READDIRPLUS;
clear_bit(NFS_INO_ADVISE_RDPLUS, &NFS_I(inode)->flags);
desc->plus = 0;
desc->plus = false;
goto again;
}
goto error;
@ -557,7 +557,7 @@ int nfs_readdir_page_filler(nfs_readdir_descriptor_t *desc, struct nfs_entry *en
count++;
if (desc->plus != 0)
if (desc->plus)
nfs_prime_dcache(file_dentry(desc->file), entry);
status = nfs_readdir_add_to_array(entry, page);
@ -860,7 +860,7 @@ static int nfs_readdir(struct file *file, struct dir_context *ctx)
desc->ctx = ctx;
desc->dir_cookie = &dir_ctx->dir_cookie;
desc->decode = NFS_PROTO(inode)->decode_dirent;
desc->plus = nfs_use_readdirplus(inode, ctx) ? 1 : 0;
desc->plus = nfs_use_readdirplus(inode, ctx);
if (ctx->pos == 0 || nfs_attribute_cache_expired(inode))
res = nfs_revalidate_mapping(inode, file->f_mapping);
@ -885,8 +885,8 @@ static int nfs_readdir(struct file *file, struct dir_context *ctx)
clear_bit(NFS_INO_ADVISE_RDPLUS, &NFS_I(inode)->flags);
nfs_zap_caches(inode);
desc->page_index = 0;
desc->plus = 0;
desc->eof = 0;
desc->plus = false;
desc->eof = false;
continue;
}
if (res < 0)
@ -1115,11 +1115,13 @@ static int nfs_lookup_revalidate(struct dentry *dentry, unsigned int flags)
/* Force a full look up iff the parent directory has changed */
if (!nfs_is_exclusive_create(dir, flags) &&
nfs_check_verifier(dir, dentry, flags & LOOKUP_RCU)) {
if (nfs_lookup_verify_inode(inode, flags)) {
error = nfs_lookup_verify_inode(inode, flags);
if (error) {
if (flags & LOOKUP_RCU)
return -ECHILD;
goto out_zap_parent;
if (error == -ESTALE)
goto out_zap_parent;
goto out_error;
}
nfs_advise_use_readdirplus(dir);
goto out_valid;
@ -1144,8 +1146,10 @@ static int nfs_lookup_revalidate(struct dentry *dentry, unsigned int flags)
trace_nfs_lookup_revalidate_enter(dir, dentry, flags);
error = NFS_PROTO(dir)->lookup(dir, &dentry->d_name, fhandle, fattr, label);
trace_nfs_lookup_revalidate_exit(dir, dentry, flags, error);
if (error)
if (error == -ESTALE || error == -ENOENT)
goto out_bad;
if (error)
goto out_error;
if (nfs_compare_fh(NFS_FH(inode), fhandle))
goto out_bad;
if ((error = nfs_refresh_inode(inode, fattr)) != 0)
@ -1427,8 +1431,10 @@ static int nfs_finish_open(struct nfs_open_context *ctx,
err = finish_open(file, dentry, do_open, opened);
if (err)
goto out;
nfs_file_set_open_context(file, ctx);
if (S_ISREG(file->f_path.dentry->d_inode->i_mode))
nfs_file_set_open_context(file, ctx);
else
err = -ESTALE;
out:
return err;
}
@ -1512,7 +1518,7 @@ int nfs_atomic_open(struct inode *dir, struct dentry *dentry,
d_drop(dentry);
switch (err) {
case -ENOENT:
d_add(dentry, NULL);
d_splice_alias(NULL, dentry);
nfs_set_verifier(dentry, nfs_save_change_attribute(dir));
break;
case -EISDIR:
@ -2035,7 +2041,11 @@ int nfs_rename(struct inode *old_dir, struct dentry *old_dentry,
}
error = rpc_wait_for_completion_task(task);
if (error == 0)
if (error != 0) {
((struct nfs_renamedata *)task->tk_calldata)->cancelled = 1;
/* Paired with the atomic_dec_and_test() barrier in rpc_do_put_task() */
smp_wmb();
} else
error = task->tk_status;
rpc_put_task(task);
nfs_mark_for_revalidate(old_inode);

177
fs/nfs/export.c Normal file
View File

@ -0,0 +1,177 @@
/*
* Copyright (c) 2015, Primary Data, Inc. All rights reserved.
*
* Tao Peng <bergwolf@primarydata.com>
*/
#include <linux/dcache.h>
#include <linux/exportfs.h>
#include <linux/nfs.h>
#include <linux/nfs_fs.h>
#include "internal.h"
#include "nfstrace.h"
#define NFSDBG_FACILITY NFSDBG_VFS
enum {
FILEID_HIGH_OFF = 0, /* inode fileid high */
FILEID_LOW_OFF, /* inode fileid low */
FILE_I_TYPE_OFF, /* inode type */
EMBED_FH_OFF /* embeded server fh */
};
static struct nfs_fh *nfs_exp_embedfh(__u32 *p)
{
return (struct nfs_fh *)(p + EMBED_FH_OFF);
}
/*
* Let's break subtree checking for now... otherwise we'll have to embed parent fh
* but there might not be enough space.
*/
static int
nfs_encode_fh(struct inode *inode, __u32 *p, int *max_len, struct inode *parent)
{
struct nfs_fh *server_fh = NFS_FH(inode);
struct nfs_fh *clnt_fh = nfs_exp_embedfh(p);
size_t fh_size = offsetof(struct nfs_fh, data) + server_fh->size;
int len = EMBED_FH_OFF + XDR_QUADLEN(fh_size);
dprintk("%s: max fh len %d inode %p parent %p",
__func__, *max_len, inode, parent);
if (*max_len < len || IS_AUTOMOUNT(inode)) {
dprintk("%s: fh len %d too small, required %d\n",
__func__, *max_len, len);
*max_len = len;
return FILEID_INVALID;
}
if (IS_AUTOMOUNT(inode)) {
*max_len = FILEID_INVALID;
goto out;
}
p[FILEID_HIGH_OFF] = NFS_FILEID(inode) >> 32;
p[FILEID_LOW_OFF] = NFS_FILEID(inode);
p[FILE_I_TYPE_OFF] = inode->i_mode & S_IFMT;
p[len - 1] = 0; /* Padding */
nfs_copy_fh(clnt_fh, server_fh);
*max_len = len;
out:
dprintk("%s: result fh fileid %llu mode %u size %d\n",
__func__, NFS_FILEID(inode), inode->i_mode, *max_len);
return *max_len;
}
static struct dentry *
nfs_fh_to_dentry(struct super_block *sb, struct fid *fid,
int fh_len, int fh_type)
{
struct nfs4_label *label = NULL;
struct nfs_fattr *fattr = NULL;
struct nfs_fh *server_fh = nfs_exp_embedfh(fid->raw);
size_t fh_size = offsetof(struct nfs_fh, data) + server_fh->size;
const struct nfs_rpc_ops *rpc_ops;
struct dentry *dentry;
struct inode *inode;
int len = EMBED_FH_OFF + XDR_QUADLEN(fh_size);
u32 *p = fid->raw;
int ret;
/* NULL translates to ESTALE */
if (fh_len < len || fh_type != len)
return NULL;
fattr = nfs_alloc_fattr();
if (fattr == NULL) {
dentry = ERR_PTR(-ENOMEM);
goto out;
}
fattr->fileid = ((u64)p[FILEID_HIGH_OFF] << 32) + p[FILEID_LOW_OFF];
fattr->mode = p[FILE_I_TYPE_OFF];
fattr->valid |= NFS_ATTR_FATTR_FILEID | NFS_ATTR_FATTR_TYPE;
dprintk("%s: fileid %llu mode %d\n", __func__, fattr->fileid, fattr->mode);
inode = nfs_ilookup(sb, fattr, server_fh);
if (inode)
goto out_found;
label = nfs4_label_alloc(NFS_SB(sb), GFP_KERNEL);
if (IS_ERR(label)) {
dentry = ERR_CAST(label);
goto out_free_fattr;
}
rpc_ops = NFS_SB(sb)->nfs_client->rpc_ops;
ret = rpc_ops->getattr(NFS_SB(sb), server_fh, fattr, label);
if (ret) {
dprintk("%s: getattr failed %d\n", __func__, ret);
dentry = ERR_PTR(ret);
goto out_free_label;
}
inode = nfs_fhget(sb, server_fh, fattr, label);
out_found:
dentry = d_obtain_alias(inode);
out_free_label:
nfs4_label_free(label);
out_free_fattr:
nfs_free_fattr(fattr);
out:
return dentry;
}
static struct dentry *
nfs_get_parent(struct dentry *dentry)
{
int ret;
struct inode *inode = d_inode(dentry), *pinode;
struct super_block *sb = inode->i_sb;
struct nfs_server *server = NFS_SB(sb);
struct nfs_fattr *fattr = NULL;
struct nfs4_label *label = NULL;
struct dentry *parent;
struct nfs_rpc_ops const *ops = server->nfs_client->rpc_ops;
struct nfs_fh fh;
if (!ops->lookupp)
return ERR_PTR(-EACCES);
fattr = nfs_alloc_fattr();
if (fattr == NULL) {
parent = ERR_PTR(-ENOMEM);
goto out;
}
label = nfs4_label_alloc(server, GFP_KERNEL);
if (IS_ERR(label)) {
parent = ERR_CAST(label);
goto out_free_fattr;
}
ret = ops->lookupp(inode, &fh, fattr, label);
if (ret) {
parent = ERR_PTR(ret);
goto out_free_label;
}
pinode = nfs_fhget(sb, &fh, fattr, label);
parent = d_obtain_alias(pinode);
out_free_label:
nfs4_label_free(label);
out_free_fattr:
nfs_free_fattr(fattr);
out:
return parent;
}
const struct export_operations nfs_export_ops = {
.encode_fh = nfs_encode_fh,
.fh_to_dentry = nfs_fh_to_dentry,
.get_parent = nfs_get_parent,
};

View File

@ -126,32 +126,13 @@ static int filelayout_async_handle_error(struct rpc_task *task,
{
struct pnfs_layout_hdr *lo = lseg->pls_layout;
struct inode *inode = lo->plh_inode;
struct nfs_server *mds_server = NFS_SERVER(inode);
struct nfs4_deviceid_node *devid = FILELAYOUT_DEVID_NODE(lseg);
struct nfs_client *mds_client = mds_server->nfs_client;
struct nfs4_slot_table *tbl = &clp->cl_session->fc_slot_table;
if (task->tk_status >= 0)
return 0;
switch (task->tk_status) {
/* MDS state errors */
case -NFS4ERR_DELEG_REVOKED:
case -NFS4ERR_ADMIN_REVOKED:
case -NFS4ERR_BAD_STATEID:
case -NFS4ERR_OPENMODE:
if (state == NULL)
break;
if (nfs4_schedule_stateid_recovery(mds_server, state) < 0)
goto out_bad_stateid;
goto wait_on_recovery;
case -NFS4ERR_EXPIRED:
if (state != NULL) {
if (nfs4_schedule_stateid_recovery(mds_server, state) < 0)
goto out_bad_stateid;
}
nfs4_schedule_lease_recovery(mds_client);
goto wait_on_recovery;
/* DS session errors */
case -NFS4ERR_BADSESSION:
case -NFS4ERR_BADSLOT:
@ -172,6 +153,7 @@ static int filelayout_async_handle_error(struct rpc_task *task,
case -NFS4ERR_RETRY_UNCACHED_REP:
break;
/* Invalidate Layout errors */
case -NFS4ERR_ACCESS:
case -NFS4ERR_PNFS_NO_LAYOUT:
case -ESTALE: /* mapped NFS4ERR_STALE */
case -EBADHANDLE: /* mapped NFS4ERR_BADHANDLE */
@ -202,26 +184,17 @@ static int filelayout_async_handle_error(struct rpc_task *task,
task->tk_status);
nfs4_mark_deviceid_unavailable(devid);
pnfs_error_mark_layout_for_return(inode, lseg);
pnfs_set_lo_fail(lseg);
rpc_wake_up(&tbl->slot_tbl_waitq);
/* fall through */
default:
pnfs_set_lo_fail(lseg);
reset:
dprintk("%s Retry through MDS. Error %d\n", __func__,
task->tk_status);
return -NFS4ERR_RESET_TO_MDS;
}
out:
task->tk_status = 0;
return -EAGAIN;
out_bad_stateid:
task->tk_status = -EIO;
return 0;
wait_on_recovery:
rpc_sleep_on(&mds_client->cl_rpcwaitq, task, NULL);
if (test_bit(NFS4CLNT_MANAGER_RUNNING, &mds_client->cl_state) == 0)
rpc_wake_up_queued_task(&mds_client->cl_rpcwaitq, task);
goto out;
}
/* NFS_PROTO call done callback routines */

View File

@ -1050,34 +1050,10 @@ static int ff_layout_async_handle_error_v4(struct rpc_task *task,
{
struct pnfs_layout_hdr *lo = lseg->pls_layout;
struct inode *inode = lo->plh_inode;
struct nfs_server *mds_server = NFS_SERVER(inode);
struct nfs4_deviceid_node *devid = FF_LAYOUT_DEVID_NODE(lseg, idx);
struct nfs_client *mds_client = mds_server->nfs_client;
struct nfs4_slot_table *tbl = &clp->cl_session->fc_slot_table;
switch (task->tk_status) {
/* MDS state errors */
case -NFS4ERR_DELEG_REVOKED:
case -NFS4ERR_ADMIN_REVOKED:
case -NFS4ERR_BAD_STATEID:
if (state == NULL)
break;
nfs_remove_bad_delegation(state->inode, NULL);
case -NFS4ERR_OPENMODE:
if (state == NULL)
break;
if (nfs4_schedule_stateid_recovery(mds_server, state) < 0)
goto out_bad_stateid;
goto wait_on_recovery;
case -NFS4ERR_EXPIRED:
if (state != NULL) {
if (nfs4_schedule_stateid_recovery(mds_server, state) < 0)
goto out_bad_stateid;
}
nfs4_schedule_lease_recovery(mds_client);
goto wait_on_recovery;
/* DS session errors */
case -NFS4ERR_BADSESSION:
case -NFS4ERR_BADSLOT:
case -NFS4ERR_BAD_HIGH_SLOT:
@ -1137,17 +1113,8 @@ reset:
task->tk_status);
return -NFS4ERR_RESET_TO_MDS;
}
out:
task->tk_status = 0;
return -EAGAIN;
out_bad_stateid:
task->tk_status = -EIO;
return 0;
wait_on_recovery:
rpc_sleep_on(&mds_client->cl_rpcwaitq, task, NULL);
if (test_bit(NFS4CLNT_MANAGER_RUNNING, &mds_client->cl_state) == 0)
rpc_wake_up_queued_task(&mds_client->cl_rpcwaitq, task);
goto out;
}
/* Retry all errors through either pNFS or MDS except for -EJUKEBOX */

View File

@ -386,6 +386,28 @@ void nfs_setsecurity(struct inode *inode, struct nfs_fattr *fattr,
#endif
EXPORT_SYMBOL_GPL(nfs_setsecurity);
/* Search for inode identified by fh, fileid and i_mode in inode cache. */
struct inode *
nfs_ilookup(struct super_block *sb, struct nfs_fattr *fattr, struct nfs_fh *fh)
{
struct nfs_find_desc desc = {
.fh = fh,
.fattr = fattr,
};
struct inode *inode;
unsigned long hash;
if (!(fattr->valid & NFS_ATTR_FATTR_FILEID) ||
!(fattr->valid & NFS_ATTR_FATTR_TYPE))
return NULL;
hash = nfs_fattr_to_ino_t(fattr);
inode = ilookup5(sb, hash, nfs_find_actor, &desc);
dprintk("%s: returning %p\n", __func__, inode);
return inode;
}
/*
* This is our front-end to iget that looks up inodes by file handle
* instead of inode number.
@ -525,8 +547,14 @@ nfs_fhget(struct super_block *sb, struct nfs_fh *fh, struct nfs_fattr *fattr, st
nfs_fscache_init_inode(inode);
unlock_new_inode(inode);
} else
nfs_refresh_inode(inode, fattr);
} else {
int err = nfs_refresh_inode(inode, fattr);
if (err < 0) {
iput(inode);
inode = ERR_PTR(err);
goto out_no_inode;
}
}
dprintk("NFS: nfs_fhget(%s/%Lu fh_crc=0x%08x ct=%d)\n",
inode->i_sb->s_id,
(unsigned long long)NFS_FILEID(inode),
@ -1315,9 +1343,9 @@ static int nfs_check_inode_attributes(struct inode *inode, struct nfs_fattr *fat
return 0;
/* Has the inode gone and changed behind our back? */
if ((fattr->valid & NFS_ATTR_FATTR_FILEID) && nfsi->fileid != fattr->fileid)
return -EIO;
return -ESTALE;
if ((fattr->valid & NFS_ATTR_FATTR_TYPE) && (inode->i_mode & S_IFMT) != (fattr->mode & S_IFMT))
return -EIO;
return -ESTALE;
if (!nfs_file_has_buffered_writers(nfsi)) {
/* Verify a few of the more important attributes */

View File

@ -11,6 +11,8 @@
#define NFS_MS_MASK (MS_RDONLY|MS_NOSUID|MS_NODEV|MS_NOEXEC|MS_SYNCHRONOUS)
extern const struct export_operations nfs_export_ops;
struct nfs_string;
/* Maximum number of readahead requests
@ -273,17 +275,17 @@ static inline bool nfs_match_open_context(const struct nfs_open_context *ctx1,
/* nfs2xdr.c */
extern const struct rpc_procinfo nfs_procedures[];
extern int nfs2_decode_dirent(struct xdr_stream *,
struct nfs_entry *, int);
struct nfs_entry *, bool);
/* nfs3xdr.c */
extern const struct rpc_procinfo nfs3_procedures[];
extern int nfs3_decode_dirent(struct xdr_stream *,
struct nfs_entry *, int);
struct nfs_entry *, bool);
/* nfs4xdr.c */
#if IS_ENABLED(CONFIG_NFS_V4)
extern int nfs4_decode_dirent(struct xdr_stream *,
struct nfs_entry *, int);
struct nfs_entry *, bool);
#endif
#ifdef CONFIG_NFS_V4_1
extern const u32 nfs41_maxread_overhead;

View File

@ -939,7 +939,7 @@ static int nfs2_xdr_dec_writeres(struct rpc_rqst *req, struct xdr_stream *xdr,
* };
*/
int nfs2_decode_dirent(struct xdr_stream *xdr, struct nfs_entry *entry,
int plus)
bool plus)
{
__be32 *p;
int error;

View File

@ -621,7 +621,7 @@ out:
*/
static int
nfs3_proc_readdir(struct dentry *dentry, struct rpc_cred *cred,
u64 cookie, struct page **pages, unsigned int count, int plus)
u64 cookie, struct page **pages, unsigned int count, bool plus)
{
struct inode *dir = d_inode(dentry);
__be32 *verf = NFS_I(dir)->cookieverf;

View File

@ -1991,7 +1991,7 @@ out_status:
* };
*/
int nfs3_decode_dirent(struct xdr_stream *xdr, struct nfs_entry *entry,
int plus)
bool plus)
{
struct nfs_entry old = *entry;
__be32 *p;

View File

@ -146,7 +146,7 @@ static ssize_t _nfs42_proc_copy(struct file *src,
loff_t pos_src = args->src_pos;
loff_t pos_dst = args->dst_pos;
size_t count = args->count;
int status;
ssize_t status;
status = nfs4_set_rw_stateid(&args->src_stateid, src_lock->open_context,
src_lock, FMODE_READ);

View File

@ -414,6 +414,7 @@ struct nfs_client *nfs4_init_client(struct nfs_client *clp,
if (clp != old)
clp->cl_preserve_clid = true;
nfs_put_client(clp);
clear_bit(NFS_CS_TSM_POSSIBLE, &clp->cl_flags);
return old;
error:
@ -852,6 +853,8 @@ static int nfs4_set_client(struct nfs_server *server,
set_bit(NFS_CS_NORESVPORT, &cl_init.init_flags);
if (server->options & NFS_OPTION_MIGRATION)
set_bit(NFS_CS_MIGRATION, &cl_init.init_flags);
if (test_bit(NFS_MIG_TSM_POSSIBLE, &server->mig_status))
set_bit(NFS_CS_TSM_POSSIBLE, &cl_init.init_flags);
/* Allocate or find a client reference we can use */
clp = nfs_get_client(&cl_init);
@ -1212,9 +1215,11 @@ int nfs4_update_server(struct nfs_server *server, const char *hostname,
return -EAFNOSUPPORT;
nfs_server_remove_lists(server);
set_bit(NFS_MIG_TSM_POSSIBLE, &server->mig_status);
error = nfs4_set_client(server, hostname, sap, salen, buf,
clp->cl_proto, clnt->cl_timeout,
clp->cl_minorversion, net);
clear_bit(NFS_MIG_TSM_POSSIBLE, &server->mig_status);
nfs_put_client(clp);
if (error != 0) {
nfs_server_insert_lists(server);

View File

@ -364,7 +364,8 @@ static int nfs_idmap_lookup_id(const char *name, size_t namelen, const char *typ
ret = -EINVAL;
} else {
ret = kstrtol(id_str, 10, &id_long);
*id = (__u32)id_long;
if (!ret)
*id = (__u32)id_long;
}
return ret;
}

View File

@ -275,6 +275,7 @@ const u32 nfs4_fs_locations_bitmap[3] = {
static void nfs4_setup_readdir(u64 cookie, __be32 *verifier, struct dentry *dentry,
struct nfs4_readdir_arg *readdir)
{
unsigned int attrs = FATTR4_WORD0_FILEID | FATTR4_WORD0_TYPE;
__be32 *start, *p;
if (cookie > 2) {
@ -305,8 +306,9 @@ static void nfs4_setup_readdir(u64 cookie, __be32 *verifier, struct dentry *dent
memcpy(p, ".\0\0\0", 4); /* entry */
p++;
*p++ = xdr_one; /* bitmap length */
*p++ = htonl(FATTR4_WORD0_FILEID); /* bitmap */
*p++ = htonl(8); /* attribute buffer length */
*p++ = htonl(attrs); /* bitmap */
*p++ = htonl(12); /* attribute buffer length */
*p++ = htonl(NF4DIR);
p = xdr_encode_hyper(p, NFS_FILEID(d_inode(dentry)));
}
@ -317,8 +319,9 @@ static void nfs4_setup_readdir(u64 cookie, __be32 *verifier, struct dentry *dent
memcpy(p, "..\0\0", 4); /* entry */
p++;
*p++ = xdr_one; /* bitmap length */
*p++ = htonl(FATTR4_WORD0_FILEID); /* bitmap */
*p++ = htonl(8); /* attribute buffer length */
*p++ = htonl(attrs); /* bitmap */
*p++ = htonl(12); /* attribute buffer length */
*p++ = htonl(NF4DIR);
p = xdr_encode_hyper(p, NFS_FILEID(d_inode(dentry->d_parent)));
readdir->pgbase = (char *)p - (char *)start;
@ -1034,11 +1037,11 @@ struct nfs4_opendata {
struct nfs4_state *state;
struct iattr attrs;
unsigned long timestamp;
unsigned int rpc_done : 1;
unsigned int file_created : 1;
unsigned int is_recover : 1;
bool rpc_done;
bool file_created;
bool is_recover;
bool cancelled;
int rpc_status;
int cancelled;
};
static bool nfs4_clear_cap_atomic_open_v1(struct nfs_server *server,
@ -1962,7 +1965,7 @@ static void nfs4_open_confirm_done(struct rpc_task *task, void *calldata)
nfs4_stateid_copy(&data->o_res.stateid, &data->c_res.stateid);
nfs_confirm_seqid(&data->owner->so_seqid, 0);
renew_lease(data->o_res.server, data->timestamp);
data->rpc_done = 1;
data->rpc_done = true;
}
}
@ -1972,7 +1975,7 @@ static void nfs4_open_confirm_release(void *calldata)
struct nfs4_state *state = NULL;
/* If this request hasn't been cancelled, do nothing */
if (data->cancelled == 0)
if (!data->cancelled)
goto out_free;
/* In case of error, no cleanup! */
if (!data->rpc_done)
@ -2015,7 +2018,7 @@ static int _nfs4_proc_open_confirm(struct nfs4_opendata *data)
nfs4_init_sequence(&data->c_arg.seq_args, &data->c_res.seq_res, 1);
kref_get(&data->kref);
data->rpc_done = 0;
data->rpc_done = false;
data->rpc_status = 0;
data->timestamp = jiffies;
if (data->is_recover)
@ -2025,7 +2028,7 @@ static int _nfs4_proc_open_confirm(struct nfs4_opendata *data)
return PTR_ERR(task);
status = rpc_wait_for_completion_task(task);
if (status != 0) {
data->cancelled = 1;
data->cancelled = true;
smp_wmb();
} else
status = data->rpc_status;
@ -2124,7 +2127,7 @@ static void nfs4_open_done(struct rpc_task *task, void *calldata)
if (!(data->o_res.rflags & NFS4_OPEN_RESULT_CONFIRM))
nfs_confirm_seqid(&data->owner->so_seqid, 0);
}
data->rpc_done = 1;
data->rpc_done = true;
}
static void nfs4_open_release(void *calldata)
@ -2133,7 +2136,7 @@ static void nfs4_open_release(void *calldata)
struct nfs4_state *state = NULL;
/* If this request hasn't been cancelled, do nothing */
if (data->cancelled == 0)
if (!data->cancelled)
goto out_free;
/* In case of error, no cleanup! */
if (data->rpc_status != 0 || !data->rpc_done)
@ -2179,20 +2182,20 @@ static int nfs4_run_open_task(struct nfs4_opendata *data, int isrecover)
nfs4_init_sequence(&o_arg->seq_args, &o_res->seq_res, 1);
kref_get(&data->kref);
data->rpc_done = 0;
data->rpc_done = false;
data->rpc_status = 0;
data->cancelled = 0;
data->is_recover = 0;
data->cancelled = false;
data->is_recover = false;
if (isrecover) {
nfs4_set_sequence_privileged(&o_arg->seq_args);
data->is_recover = 1;
data->is_recover = true;
}
task = rpc_run_task(&task_setup_data);
if (IS_ERR(task))
return PTR_ERR(task);
status = rpc_wait_for_completion_task(task);
if (status != 0) {
data->cancelled = 1;
data->cancelled = true;
smp_wmb();
} else
status = data->rpc_status;
@ -2287,9 +2290,9 @@ static int _nfs4_proc_open(struct nfs4_opendata *data)
if (o_arg->open_flags & O_CREAT) {
if (o_arg->open_flags & O_EXCL)
data->file_created = 1;
data->file_created = true;
else if (o_res->cinfo.before != o_res->cinfo.after)
data->file_created = 1;
data->file_created = true;
if (data->file_created || dir->i_version != o_res->cinfo.after)
update_changeattr(dir, &o_res->cinfo,
o_res->f_attr->time_start);
@ -3803,6 +3806,54 @@ nfs4_proc_lookup_mountpoint(struct inode *dir, const struct qstr *name,
return (client == NFS_CLIENT(dir)) ? rpc_clone_client(client) : client;
}
static int _nfs4_proc_lookupp(struct inode *inode,
struct nfs_fh *fhandle, struct nfs_fattr *fattr,
struct nfs4_label *label)
{
struct rpc_clnt *clnt = NFS_CLIENT(inode);
struct nfs_server *server = NFS_SERVER(inode);
int status;
struct nfs4_lookupp_arg args = {
.bitmask = server->attr_bitmask,
.fh = NFS_FH(inode),
};
struct nfs4_lookupp_res res = {
.server = server,
.fattr = fattr,
.label = label,
.fh = fhandle,
};
struct rpc_message msg = {
.rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_LOOKUPP],
.rpc_argp = &args,
.rpc_resp = &res,
};
args.bitmask = nfs4_bitmask(server, label);
nfs_fattr_init(fattr);
dprintk("NFS call lookupp ino=0x%lx\n", inode->i_ino);
status = nfs4_call_sync(clnt, server, &msg, &args.seq_args,
&res.seq_res, 0);
dprintk("NFS reply lookupp: %d\n", status);
return status;
}
static int nfs4_proc_lookupp(struct inode *inode, struct nfs_fh *fhandle,
struct nfs_fattr *fattr, struct nfs4_label *label)
{
struct nfs4_exception exception = { };
int err;
do {
err = _nfs4_proc_lookupp(inode, fhandle, fattr, label);
trace_nfs4_lookupp(inode, err);
err = nfs4_handle_exception(NFS_SERVER(inode), err,
&exception);
} while (exception.retry);
return err;
}
static int _nfs4_proc_access(struct inode *inode, struct nfs_access_entry *entry)
{
struct nfs_server *server = NFS_SERVER(inode);
@ -4273,7 +4324,7 @@ static int nfs4_proc_mkdir(struct inode *dir, struct dentry *dentry,
}
static int _nfs4_proc_readdir(struct dentry *dentry, struct rpc_cred *cred,
u64 cookie, struct page **pages, unsigned int count, int plus)
u64 cookie, struct page **pages, unsigned int count, bool plus)
{
struct inode *dir = d_inode(dentry);
struct nfs4_readdir_arg args = {
@ -4311,7 +4362,7 @@ static int _nfs4_proc_readdir(struct dentry *dentry, struct rpc_cred *cred,
}
static int nfs4_proc_readdir(struct dentry *dentry, struct rpc_cred *cred,
u64 cookie, struct page **pages, unsigned int count, int plus)
u64 cookie, struct page **pages, unsigned int count, bool plus)
{
struct nfs4_exception exception = { };
int err;
@ -6135,7 +6186,7 @@ static void nfs4_lock_release(void *calldata)
dprintk("%s: begin!\n", __func__);
nfs_free_seqid(data->arg.open_seqid);
if (data->cancelled != 0) {
if (data->cancelled) {
struct rpc_task *task;
task = nfs4_do_unlck(&data->fl, data->ctx, data->lsp,
data->arg.lock_seqid);
@ -6218,7 +6269,7 @@ static int _nfs4_do_setlk(struct nfs4_state *state, int cmd, struct file_lock *f
nfs4_handle_setlk_error(data->server, data->lsp,
data->arg.new_lock_owner, ret);
} else
data->cancelled = 1;
data->cancelled = true;
rpc_put_task(task);
dprintk("%s: done, ret = %d!\n", __func__, ret);
trace_nfs4_set_lock(fl, state, &data->res.stateid, cmd, ret);
@ -7376,12 +7427,11 @@ static void nfs4_exchange_id_done(struct rpc_task *task, void *data)
if (status == 0) {
clp->cl_clientid = cdata->res.clientid;
clp->cl_exchange_flags = cdata->res.flags;
clp->cl_seqid = cdata->res.seqid;
/* Client ID is not confirmed */
if (!(cdata->res.flags & EXCHGID4_FLAG_CONFIRMED_R)) {
if (!(cdata->res.flags & EXCHGID4_FLAG_CONFIRMED_R))
clear_bit(NFS4_SESSION_ESTABLISHED,
&clp->cl_session->session_state);
clp->cl_seqid = cdata->res.seqid;
}
&clp->cl_session->session_state);
kfree(clp->cl_serverowner);
clp->cl_serverowner = cdata->res.server_owner;
@ -9313,6 +9363,7 @@ const struct nfs_rpc_ops nfs_v4_clientops = {
.getattr = nfs4_proc_getattr,
.setattr = nfs4_proc_setattr,
.lookup = nfs4_proc_lookup,
.lookupp = nfs4_proc_lookupp,
.access = nfs4_proc_access,
.readlink = nfs4_proc_readlink,
.create = nfs4_proc_create,

View File

@ -352,11 +352,17 @@ int nfs41_discover_server_trunking(struct nfs_client *clp,
if (clp != *result)
return 0;
/* Purge state if the client id was established in a prior instance */
if (clp->cl_exchange_flags & EXCHGID4_FLAG_CONFIRMED_R)
set_bit(NFS4CLNT_PURGE_STATE, &clp->cl_state);
else
set_bit(NFS4CLNT_LEASE_CONFIRM, &clp->cl_state);
/*
* Purge state if the client id was established in a prior
* instance and the client id could not have arrived on the
* server via Transparent State Migration.
*/
if (clp->cl_exchange_flags & EXCHGID4_FLAG_CONFIRMED_R) {
if (!test_bit(NFS_CS_TSM_POSSIBLE, &clp->cl_flags))
set_bit(NFS4CLNT_PURGE_STATE, &clp->cl_state);
else
set_bit(NFS4CLNT_LEASE_CONFIRM, &clp->cl_state);
}
nfs4_schedule_state_manager(clp);
status = nfs_wait_client_init_complete(clp);
if (status < 0)

View File

@ -891,6 +891,35 @@ DEFINE_NFS4_LOOKUP_EVENT(nfs4_remove);
DEFINE_NFS4_LOOKUP_EVENT(nfs4_get_fs_locations);
DEFINE_NFS4_LOOKUP_EVENT(nfs4_secinfo);
TRACE_EVENT(nfs4_lookupp,
TP_PROTO(
const struct inode *inode,
int error
),
TP_ARGS(inode, error),
TP_STRUCT__entry(
__field(dev_t, dev)
__field(u64, ino)
__field(int, error)
),
TP_fast_assign(
__entry->dev = inode->i_sb->s_dev;
__entry->ino = NFS_FILEID(inode);
__entry->error = error;
),
TP_printk(
"error=%d (%s) inode=%02x:%02x:%llu",
__entry->error,
show_nfsv4_errors(__entry->error),
MAJOR(__entry->dev), MINOR(__entry->dev),
(unsigned long long)__entry->ino
)
);
TRACE_EVENT(nfs4_rename,
TP_PROTO(
const struct inode *olddir,

View File

@ -159,6 +159,8 @@ static int nfs4_stat_to_errno(int);
(op_decode_hdr_maxsz)
#define encode_lookup_maxsz (op_encode_hdr_maxsz + nfs4_name_maxsz)
#define decode_lookup_maxsz (op_decode_hdr_maxsz)
#define encode_lookupp_maxsz (op_encode_hdr_maxsz)
#define decode_lookupp_maxsz (op_decode_hdr_maxsz)
#define encode_share_access_maxsz \
(2)
#define encode_createmode_maxsz (1 + encode_attrs_maxsz + encode_verifier_maxsz)
@ -618,6 +620,18 @@ static int nfs4_stat_to_errno(int);
decode_lookup_maxsz + \
decode_getattr_maxsz + \
decode_getfh_maxsz)
#define NFS4_enc_lookupp_sz (compound_encode_hdr_maxsz + \
encode_sequence_maxsz + \
encode_putfh_maxsz + \
encode_lookupp_maxsz + \
encode_getattr_maxsz + \
encode_getfh_maxsz)
#define NFS4_dec_lookupp_sz (compound_decode_hdr_maxsz + \
decode_sequence_maxsz + \
decode_putfh_maxsz + \
decode_lookupp_maxsz + \
decode_getattr_maxsz + \
decode_getfh_maxsz)
#define NFS4_enc_lookup_root_sz (compound_encode_hdr_maxsz + \
encode_sequence_maxsz + \
encode_putrootfh_maxsz + \
@ -1368,6 +1382,11 @@ static void encode_lookup(struct xdr_stream *xdr, const struct qstr *name, struc
encode_string(xdr, name->len, name->name);
}
static void encode_lookupp(struct xdr_stream *xdr, struct compound_hdr *hdr)
{
encode_op_hdr(xdr, OP_LOOKUPP, decode_lookupp_maxsz, hdr);
}
static void encode_share_access(struct xdr_stream *xdr, u32 share_access)
{
__be32 *p;
@ -2122,6 +2141,26 @@ static void nfs4_xdr_enc_lookup(struct rpc_rqst *req, struct xdr_stream *xdr,
encode_nops(&hdr);
}
/*
* Encode LOOKUPP request
*/
static void nfs4_xdr_enc_lookupp(struct rpc_rqst *req, struct xdr_stream *xdr,
const void *data)
{
const struct nfs4_lookupp_arg *args = data;
struct compound_hdr hdr = {
.minorversion = nfs4_xdr_minorversion(&args->seq_args),
};
encode_compound_hdr(xdr, req, &hdr);
encode_sequence(xdr, &args->seq_args, &hdr);
encode_putfh(xdr, args->fh, &hdr);
encode_lookupp(xdr, &hdr);
encode_getfh(xdr, &hdr);
encode_getfattr(xdr, args->bitmask, &hdr);
encode_nops(&hdr);
}
/*
* Encode LOOKUP_ROOT request
*/
@ -5058,6 +5097,11 @@ static int decode_lookup(struct xdr_stream *xdr)
return decode_op_hdr(xdr, OP_LOOKUP);
}
static int decode_lookupp(struct xdr_stream *xdr)
{
return decode_op_hdr(xdr, OP_LOOKUPP);
}
/* This is too sick! */
static int decode_space_limit(struct xdr_stream *xdr,
unsigned long *pagemod_limit)
@ -6237,6 +6281,36 @@ out:
return status;
}
/*
* Decode LOOKUPP response
*/
static int nfs4_xdr_dec_lookupp(struct rpc_rqst *rqstp, struct xdr_stream *xdr,
void *data)
{
struct nfs4_lookupp_res *res = data;
struct compound_hdr hdr;
int status;
status = decode_compound_hdr(xdr, &hdr);
if (status)
goto out;
status = decode_sequence(xdr, &res->seq_res, rqstp);
if (status)
goto out;
status = decode_putfh(xdr);
if (status)
goto out;
status = decode_lookupp(xdr);
if (status)
goto out;
status = decode_getfh(xdr, res->fh);
if (status)
goto out;
status = decode_getfattr_label(xdr, res->fattr, res->label, res->server);
out:
return status;
}
/*
* Decode LOOKUP_ROOT response
*/
@ -7447,7 +7521,7 @@ out:
* on a directory already in our cache.
*/
int nfs4_decode_dirent(struct xdr_stream *xdr, struct nfs_entry *entry,
int plus)
bool plus)
{
unsigned int savep;
uint32_t bitmap[3] = {0};
@ -7614,6 +7688,7 @@ const struct rpc_procinfo nfs4_procedures[] = {
PROC(ACCESS, enc_access, dec_access),
PROC(GETATTR, enc_getattr, dec_getattr),
PROC(LOOKUP, enc_lookup, dec_lookup),
PROC(LOOKUPP, enc_lookupp, dec_lookupp),
PROC(LOOKUP_ROOT, enc_lookup_root, dec_lookup_root),
PROC(REMOVE, enc_remove, dec_remove),
PROC(RENAME, enc_rename, dec_rename),

View File

@ -50,8 +50,8 @@ void nfs_pgheader_init(struct nfs_pageio_descriptor *desc,
hdr->cred = hdr->req->wb_context->cred;
hdr->io_start = req_offset(hdr->req);
hdr->good_bytes = mirror->pg_count;
hdr->io_completion = desc->pg_io_completion;
hdr->dreq = desc->pg_dreq;
hdr->layout_private = desc->pg_layout_private;
hdr->release = release;
hdr->completion_ops = desc->pg_completion_ops;
if (hdr->completion_ops->init_hdr)
@ -155,9 +155,12 @@ nfs_page_group_lock(struct nfs_page *req, bool nonblock)
if (!test_and_set_bit(PG_HEADLOCK, &head->wb_flags))
return 0;
if (!nonblock)
if (!nonblock) {
set_bit(PG_CONTENDED1, &head->wb_flags);
smp_mb__after_atomic();
return wait_on_bit_lock(&head->wb_flags, PG_HEADLOCK,
TASK_UNINTERRUPTIBLE);
}
return -EAGAIN;
}
@ -175,6 +178,10 @@ nfs_page_group_lock_wait(struct nfs_page *req)
WARN_ON_ONCE(head != head->wb_head);
if (!test_bit(PG_HEADLOCK, &head->wb_flags))
return;
set_bit(PG_CONTENDED1, &head->wb_flags);
smp_mb__after_atomic();
wait_on_bit(&head->wb_flags, PG_HEADLOCK,
TASK_UNINTERRUPTIBLE);
}
@ -193,6 +200,8 @@ nfs_page_group_unlock(struct nfs_page *req)
smp_mb__before_atomic();
clear_bit(PG_HEADLOCK, &head->wb_flags);
smp_mb__after_atomic();
if (!test_bit(PG_CONTENDED1, &head->wb_flags))
return;
wake_up_bit(&head->wb_flags, PG_HEADLOCK);
}
@ -383,6 +392,8 @@ void nfs_unlock_request(struct nfs_page *req)
smp_mb__before_atomic();
clear_bit(PG_BUSY, &req->wb_flags);
smp_mb__after_atomic();
if (!test_bit(PG_CONTENDED2, &req->wb_flags))
return;
wake_up_bit(&req->wb_flags, PG_BUSY);
}
@ -465,6 +476,10 @@ void nfs_release_request(struct nfs_page *req)
int
nfs_wait_on_request(struct nfs_page *req)
{
if (!test_bit(PG_BUSY, &req->wb_flags))
return 0;
set_bit(PG_CONTENDED2, &req->wb_flags);
smp_mb__after_atomic();
return wait_on_bit_io(&req->wb_flags, PG_BUSY,
TASK_UNINTERRUPTIBLE);
}
@ -710,8 +725,8 @@ void nfs_pageio_init(struct nfs_pageio_descriptor *desc,
desc->pg_ioflags = io_flags;
desc->pg_error = 0;
desc->pg_lseg = NULL;
desc->pg_io_completion = NULL;
desc->pg_dreq = NULL;
desc->pg_layout_private = NULL;
desc->pg_bsize = bsize;
desc->pg_mirror_count = 1;
@ -779,6 +794,7 @@ int nfs_generic_pgio(struct nfs_pageio_descriptor *desc,
gfp_t gfp_flags = GFP_KERNEL;
pagecount = nfs_page_array_len(mirror->pg_base, mirror->pg_count);
pg_array->npages = pagecount;
if (pagecount <= ARRAY_SIZE(pg_array->page_array))
pg_array->pagevec = pg_array->page_array;
@ -1233,6 +1249,7 @@ int nfs_pageio_resend(struct nfs_pageio_descriptor *desc,
{
LIST_HEAD(failed);
desc->pg_io_completion = hdr->io_completion;
desc->pg_dreq = hdr->dreq;
while (!list_empty(&hdr->pages)) {
struct nfs_page *req = nfs_list_entry(hdr->pages.next);

View File

@ -485,7 +485,7 @@ nfs_proc_rmdir(struct inode *dir, const struct qstr *name)
*/
static int
nfs_proc_readdir(struct dentry *dentry, struct rpc_cred *cred,
u64 cookie, struct page **pages, unsigned int count, int plus)
u64 cookie, struct page **pages, unsigned int count, bool plus)
{
struct inode *dir = d_inode(dentry);
struct nfs_readdirargs arg = {

View File

@ -879,7 +879,7 @@ int nfs_show_stats(struct seq_file *m, struct dentry *root)
if (nfss->options & NFS_OPTION_FSCACHE) {
seq_printf(m, "\n\tfsc:\t");
for (i = 0; i < __NFSIOS_FSCACHEMAX; i++)
seq_printf(m, "%Lu ", totals.bytes[i]);
seq_printf(m, "%Lu ", totals.fscache[i]);
}
#endif
seq_printf(m, "\n");
@ -2339,6 +2339,7 @@ void nfs_fill_super(struct super_block *sb, struct nfs_mount_info *mount_info)
*/
sb->s_flags |= MS_POSIXACL;
sb->s_time_gran = 1;
sb->s_export_op = &nfs_export_ops;
}
nfs_initialise_sb(sb);
@ -2360,6 +2361,7 @@ static void nfs_clone_super(struct super_block *sb,
sb->s_xattr = old_sb->s_xattr;
sb->s_op = old_sb->s_op;
sb->s_time_gran = 1;
sb->s_export_op = old_sb->s_export_op;
if (server->nfs_client->rpc_ops->version != 2) {
/* The VFS shouldn't apply the umask to mode bits. We will do

View File

@ -288,6 +288,19 @@ static void nfs_async_rename_release(void *calldata)
if (d_really_is_positive(data->old_dentry))
nfs_mark_for_revalidate(d_inode(data->old_dentry));
/* The result of the rename is unknown. Play it safe by
* forcing a new lookup */
if (data->cancelled) {
spin_lock(&data->old_dir->i_lock);
nfs_force_lookup_revalidate(data->old_dir);
spin_unlock(&data->old_dir->i_lock);
if (data->new_dir != data->old_dir) {
spin_lock(&data->new_dir->i_lock);
nfs_force_lookup_revalidate(data->new_dir);
spin_unlock(&data->new_dir->i_lock);
}
}
dput(data->old_dentry);
dput(data->new_dentry);
iput(data->old_dir);

View File

@ -40,6 +40,12 @@
#define MIN_POOL_WRITE (32)
#define MIN_POOL_COMMIT (4)
struct nfs_io_completion {
void (*complete)(void *data);
void *data;
struct kref refcount;
};
/*
* Local function declarations
*/
@ -108,6 +114,39 @@ static void nfs_writehdr_free(struct nfs_pgio_header *hdr)
mempool_free(hdr, nfs_wdata_mempool);
}
static struct nfs_io_completion *nfs_io_completion_alloc(gfp_t gfp_flags)
{
return kmalloc(sizeof(struct nfs_io_completion), gfp_flags);
}
static void nfs_io_completion_init(struct nfs_io_completion *ioc,
void (*complete)(void *), void *data)
{
ioc->complete = complete;
ioc->data = data;
kref_init(&ioc->refcount);
}
static void nfs_io_completion_release(struct kref *kref)
{
struct nfs_io_completion *ioc = container_of(kref,
struct nfs_io_completion, refcount);
ioc->complete(ioc->data);
kfree(ioc);
}
static void nfs_io_completion_get(struct nfs_io_completion *ioc)
{
if (ioc != NULL)
kref_get(&ioc->refcount);
}
static void nfs_io_completion_put(struct nfs_io_completion *ioc)
{
if (ioc != NULL)
kref_put(&ioc->refcount, nfs_io_completion_release);
}
static void nfs_context_set_write_error(struct nfs_open_context *ctx, int error)
{
ctx->error = error;
@ -681,18 +720,29 @@ static int nfs_writepages_callback(struct page *page, struct writeback_control *
return ret;
}
static void nfs_io_completion_commit(void *inode)
{
nfs_commit_inode(inode, 0);
}
int nfs_writepages(struct address_space *mapping, struct writeback_control *wbc)
{
struct inode *inode = mapping->host;
struct nfs_pageio_descriptor pgio;
struct nfs_io_completion *ioc = nfs_io_completion_alloc(GFP_NOFS);
int err;
nfs_inc_stats(inode, NFSIOS_VFSWRITEPAGES);
if (ioc)
nfs_io_completion_init(ioc, nfs_io_completion_commit, inode);
nfs_pageio_init_write(&pgio, inode, wb_priority(wbc), false,
&nfs_async_write_completion_ops);
pgio.pg_io_completion = ioc;
err = write_cache_pages(mapping, wbc, nfs_writepages_callback, &pgio);
nfs_pageio_complete(&pgio);
nfs_io_completion_put(ioc);
if (err < 0)
goto out_err;
@ -940,6 +990,11 @@ int nfs_write_need_commit(struct nfs_pgio_header *hdr)
return hdr->verf.committed != NFS_FILE_SYNC;
}
static void nfs_async_write_init(struct nfs_pgio_header *hdr)
{
nfs_io_completion_get(hdr->io_completion);
}
static void nfs_write_completion(struct nfs_pgio_header *hdr)
{
struct nfs_commit_info cinfo;
@ -973,6 +1028,7 @@ next:
nfs_release_request(req);
}
out:
nfs_io_completion_put(hdr->io_completion);
hdr->release(hdr);
}
@ -1378,6 +1434,7 @@ static void nfs_async_write_reschedule_io(struct nfs_pgio_header *hdr)
}
static const struct nfs_pgio_completion_ops nfs_async_write_completion_ops = {
.init_hdr = nfs_async_write_init,
.error_cleanup = nfs_async_write_error,
.completion = nfs_write_completion,
.reschedule_io = nfs_async_write_reschedule_io,
@ -1884,7 +1941,7 @@ int nfs_write_inode(struct inode *inode, struct writeback_control *wbc)
/* Don't commit yet if this is a non-blocking flush and there
* are a lot of outstanding writes for this mapping.
*/
if (nfsi->commit_info.ncommit <= (nfsi->nrequests >> 1))
if (mapping_tagged(inode->i_mapping, PAGECACHE_TAG_WRITEBACK))
goto out_mark_dirty;
/* don't wait for the COMMIT response */

View File

@ -479,6 +479,7 @@ enum {
NFSPROC4_CLNT_ACCESS,
NFSPROC4_CLNT_GETATTR,
NFSPROC4_CLNT_LOOKUP,
NFSPROC4_CLNT_LOOKUPP,
NFSPROC4_CLNT_LOOKUP_ROOT,
NFSPROC4_CLNT_REMOVE,
NFSPROC4_CLNT_RENAME,

View File

@ -332,6 +332,7 @@ extern void nfs_zap_caches(struct inode *);
extern void nfs_invalidate_atime(struct inode *);
extern struct inode *nfs_fhget(struct super_block *, struct nfs_fh *,
struct nfs_fattr *, struct nfs4_label *);
struct inode *nfs_ilookup(struct super_block *sb, struct nfs_fattr *, struct nfs_fh *);
extern int nfs_refresh_inode(struct inode *, struct nfs_fattr *);
extern int nfs_post_op_update_inode(struct inode *inode, struct nfs_fattr *fattr);
extern int nfs_post_op_update_inode_force_wcc(struct inode *inode, struct nfs_fattr *fattr);

View File

@ -42,6 +42,7 @@ struct nfs_client {
#define NFS_CS_MIGRATION 2 /* - transparent state migr */
#define NFS_CS_INFINITE_SLOTS 3 /* - don't limit TCP slots */
#define NFS_CS_NO_RETRANS_TIMEOUT 4 /* - Disable retransmit timeouts */
#define NFS_CS_TSM_POSSIBLE 5 /* - Maybe state migration */
struct sockaddr_storage cl_addr; /* server identifier */
size_t cl_addrlen;
char * cl_hostname; /* hostname of server */
@ -210,6 +211,7 @@ struct nfs_server {
unsigned long mig_status;
#define NFS_MIG_IN_TRANSITION (1)
#define NFS_MIG_FAILED (2)
#define NFS_MIG_TSM_POSSIBLE (3)
void (*destroy)(struct nfs_server *);

View File

@ -33,6 +33,8 @@ enum {
PG_UPTODATE, /* page group sync bit in read path */
PG_WB_END, /* page group sync bit in write path */
PG_REMOVE, /* page group sync bit in write path */
PG_CONTENDED1, /* Is someone waiting for a lock? */
PG_CONTENDED2, /* Is someone waiting for a lock? */
};
struct nfs_inode;
@ -93,8 +95,8 @@ struct nfs_pageio_descriptor {
const struct rpc_call_ops *pg_rpc_callops;
const struct nfs_pgio_completion_ops *pg_completion_ops;
struct pnfs_layout_segment *pg_lseg;
struct nfs_io_completion *pg_io_completion;
struct nfs_direct_req *pg_dreq;
void *pg_layout_private;
unsigned int pg_bsize; /* default bsize for mirrors */
u32 pg_mirror_count;

View File

@ -878,7 +878,7 @@ struct nfs3_readdirargs {
struct nfs_fh * fh;
__u64 cookie;
__be32 verf[2];
int plus;
bool plus;
unsigned int count;
struct page ** pages;
};
@ -909,7 +909,7 @@ struct nfs3_linkres {
struct nfs3_readdirres {
struct nfs_fattr * dir_attr;
__be32 * verf;
int plus;
bool plus;
};
struct nfs3_getaclres {
@ -1012,7 +1012,6 @@ struct nfs4_link_res {
struct nfs_fattr * dir_attr;
};
struct nfs4_lookup_arg {
struct nfs4_sequence_args seq_args;
const struct nfs_fh * dir_fh;
@ -1028,6 +1027,20 @@ struct nfs4_lookup_res {
struct nfs4_label *label;
};
struct nfs4_lookupp_arg {
struct nfs4_sequence_args seq_args;
const struct nfs_fh *fh;
const u32 *bitmask;
};
struct nfs4_lookupp_res {
struct nfs4_sequence_res seq_res;
const struct nfs_server *server;
struct nfs_fattr *fattr;
struct nfs_fh *fh;
struct nfs4_label *label;
};
struct nfs4_lookup_root_arg {
struct nfs4_sequence_args seq_args;
const u32 * bitmask;
@ -1053,7 +1066,7 @@ struct nfs4_readdir_arg {
struct page ** pages; /* zero-copy data */
unsigned int pgbase; /* zero-copy data */
const u32 * bitmask;
int plus;
bool plus;
};
struct nfs4_readdir_res {
@ -1422,6 +1435,7 @@ enum {
NFS_IOHDR_STAT,
};
struct nfs_io_completion;
struct nfs_pgio_header {
struct inode *inode;
struct rpc_cred *cred;
@ -1435,8 +1449,8 @@ struct nfs_pgio_header {
void (*release) (struct nfs_pgio_header *hdr);
const struct nfs_pgio_completion_ops *completion_ops;
const struct nfs_rw_ops *rw_ops;
struct nfs_io_completion *io_completion;
struct nfs_direct_req *dreq;
void *layout_private;
spinlock_t lock;
/* fields protected by lock */
int pnfs_error;
@ -1533,6 +1547,7 @@ struct nfs_renamedata {
struct nfs_fattr new_fattr;
void (*complete)(struct rpc_task *, struct nfs_renamedata *);
long timeout;
bool cancelled;
};
struct nfs_access_entry;
@ -1567,6 +1582,8 @@ struct nfs_rpc_ops {
int (*lookup) (struct inode *, const struct qstr *,
struct nfs_fh *, struct nfs_fattr *,
struct nfs4_label *);
int (*lookupp) (struct inode *, struct nfs_fh *,
struct nfs_fattr *, struct nfs4_label *);
int (*access) (struct inode *, struct nfs_access_entry *);
int (*readlink)(struct inode *, struct page *, unsigned int,
unsigned int);
@ -1585,7 +1602,7 @@ struct nfs_rpc_ops {
int (*mkdir) (struct inode *, struct dentry *, struct iattr *);
int (*rmdir) (struct inode *, const struct qstr *);
int (*readdir) (struct dentry *, struct rpc_cred *,
u64, struct page **, unsigned int, int);
u64, struct page **, unsigned int, bool);
int (*mknod) (struct inode *, struct dentry *, struct iattr *,
dev_t);
int (*statfs) (struct nfs_server *, struct nfs_fh *,
@ -1595,7 +1612,7 @@ struct nfs_rpc_ops {
int (*pathconf) (struct nfs_server *, struct nfs_fh *,
struct nfs_pathconf *);
int (*set_capabilities)(struct nfs_server *, struct nfs_fh *);
int (*decode_dirent)(struct xdr_stream *, struct nfs_entry *, int);
int (*decode_dirent)(struct xdr_stream *, struct nfs_entry *, bool);
int (*pgio_rpc_prepare)(struct rpc_task *,
struct nfs_pgio_header *);
void (*read_setup)(struct nfs_pgio_header *, struct rpc_message *);

View File

@ -34,6 +34,7 @@
* WARRANTIES OF MERCHANTIBILITY AND FITNESS FOR A PARTICULAR PURPOSE.
*/
#include <crypto/algapi.h>
#include <crypto/hash.h>
#include <crypto/skcipher.h>
#include <linux/err.h>
@ -927,7 +928,7 @@ gss_krb5_aes_decrypt(struct krb5_ctx *kctx, u32 offset, struct xdr_buf *buf,
if (ret)
goto out_err;
if (memcmp(pkt_hmac, our_hmac, kctx->gk5e->cksumlength) != 0) {
if (crypto_memneq(pkt_hmac, our_hmac, kctx->gk5e->cksumlength) != 0) {
ret = GSS_S_BAD_SIG;
goto out_err;
}

View File

@ -1047,13 +1047,15 @@ out:
return ret;
}
static struct rpc_rqst *xprt_dynamic_alloc_slot(struct rpc_xprt *xprt, gfp_t gfp_flags)
static struct rpc_rqst *xprt_dynamic_alloc_slot(struct rpc_xprt *xprt)
{
struct rpc_rqst *req = ERR_PTR(-EAGAIN);
if (!atomic_add_unless(&xprt->num_reqs, 1, xprt->max_reqs))
goto out;
req = kzalloc(sizeof(struct rpc_rqst), gfp_flags);
spin_unlock(&xprt->reserve_lock);
req = kzalloc(sizeof(struct rpc_rqst), GFP_NOFS);
spin_lock(&xprt->reserve_lock);
if (req != NULL)
goto out;
atomic_dec(&xprt->num_reqs);
@ -1081,7 +1083,7 @@ void xprt_alloc_slot(struct rpc_xprt *xprt, struct rpc_task *task)
list_del(&req->rq_list);
goto out_init_req;
}
req = xprt_dynamic_alloc_slot(xprt, GFP_NOWAIT|__GFP_NOWARN);
req = xprt_dynamic_alloc_slot(xprt);
if (!IS_ERR(req))
goto out_init_req;
switch (PTR_ERR(req)) {

View File

@ -91,7 +91,7 @@ __fmr_unmap(struct rpcrdma_mw *mw)
list_add(&mw->fmr.fm_mr->list, &l);
rc = ib_unmap_fmr(&l);
list_del_init(&mw->fmr.fm_mr->list);
list_del(&mw->fmr.fm_mr->list);
return rc;
}
@ -213,13 +213,11 @@ fmr_op_map(struct rpcrdma_xprt *r_xprt, struct rpcrdma_mr_seg *seg,
offset_in_page((seg-1)->mr_offset + (seg-1)->mr_len))
break;
}
mw->mw_nents = i;
mw->mw_dir = rpcrdma_data_dir(writing);
if (i == 0)
goto out_dmamap_err;
if (!ib_dma_map_sg(r_xprt->rx_ia.ri_device,
mw->mw_sg, mw->mw_nents, mw->mw_dir))
mw->mw_nents = ib_dma_map_sg(r_xprt->rx_ia.ri_device,
mw->mw_sg, i, mw->mw_dir);
if (!mw->mw_nents)
goto out_dmamap_err;
for (i = 0, dma_pages = mw->fmr.fm_physaddrs; i < mw->mw_nents; i++)
@ -237,16 +235,18 @@ fmr_op_map(struct rpcrdma_xprt *r_xprt, struct rpcrdma_mr_seg *seg,
return mw->mw_nents;
out_dmamap_err:
pr_err("rpcrdma: failed to dma map sg %p sg_nents %u\n",
mw->mw_sg, mw->mw_nents);
rpcrdma_defer_mr_recovery(mw);
pr_err("rpcrdma: failed to DMA map sg %p sg_nents %d\n",
mw->mw_sg, i);
rpcrdma_put_mw(r_xprt, mw);
return -EIO;
out_maperr:
pr_err("rpcrdma: ib_map_phys_fmr %u@0x%llx+%i (%d) status %i\n",
len, (unsigned long long)dma_pages[0],
pageoff, mw->mw_nents, rc);
rpcrdma_defer_mr_recovery(mw);
ib_dma_unmap_sg(r_xprt->rx_ia.ri_device,
mw->mw_sg, mw->mw_nents, mw->mw_dir);
rpcrdma_put_mw(r_xprt, mw);
return -EIO;
}
@ -255,24 +255,26 @@ out_maperr:
* Sleeps until it is safe for the host CPU to access the
* previously mapped memory regions.
*
* Caller ensures that req->rl_registered is not empty.
* Caller ensures that @mws is not empty before the call. This
* function empties the list.
*/
static void
fmr_op_unmap_sync(struct rpcrdma_xprt *r_xprt, struct rpcrdma_req *req)
fmr_op_unmap_sync(struct rpcrdma_xprt *r_xprt, struct list_head *mws)
{
struct rpcrdma_mw *mw, *tmp;
struct rpcrdma_mw *mw;
LIST_HEAD(unmap_list);
int rc;
dprintk("RPC: %s: req %p\n", __func__, req);
/* ORDER: Invalidate all of the req's MRs first
*
* ib_unmap_fmr() is slow, so use a single call instead
* of one call per mapped FMR.
*/
list_for_each_entry(mw, &req->rl_registered, mw_list)
list_for_each_entry(mw, mws, mw_list) {
dprintk("RPC: %s: unmapping fmr %p\n",
__func__, &mw->fmr);
list_add_tail(&mw->fmr.fm_mr->list, &unmap_list);
}
r_xprt->rx_stats.local_inv_needed++;
rc = ib_unmap_fmr(&unmap_list);
if (rc)
@ -281,9 +283,11 @@ fmr_op_unmap_sync(struct rpcrdma_xprt *r_xprt, struct rpcrdma_req *req)
/* ORDER: Now DMA unmap all of the req's MRs, and return
* them to the free MW list.
*/
list_for_each_entry_safe(mw, tmp, &req->rl_registered, mw_list) {
list_del_init(&mw->mw_list);
list_del_init(&mw->fmr.fm_mr->list);
while (!list_empty(mws)) {
mw = rpcrdma_pop_mw(mws);
dprintk("RPC: %s: DMA unmapping fmr %p\n",
__func__, &mw->fmr);
list_del(&mw->fmr.fm_mr->list);
ib_dma_unmap_sg(r_xprt->rx_ia.ri_device,
mw->mw_sg, mw->mw_nents, mw->mw_dir);
rpcrdma_put_mw(r_xprt, mw);
@ -294,8 +298,9 @@ fmr_op_unmap_sync(struct rpcrdma_xprt *r_xprt, struct rpcrdma_req *req)
out_reset:
pr_err("rpcrdma: ib_unmap_fmr failed (%i)\n", rc);
list_for_each_entry_safe(mw, tmp, &req->rl_registered, mw_list) {
list_del_init(&mw->fmr.fm_mr->list);
while (!list_empty(mws)) {
mw = rpcrdma_pop_mw(mws);
list_del(&mw->fmr.fm_mr->list);
fmr_op_recover_mr(mw);
}
}

View File

@ -277,7 +277,7 @@ __frwr_sendcompletion_flush(struct ib_wc *wc, const char *wr)
}
/**
* frwr_wc_fastreg - Invoked by RDMA provider for each polled FastReg WC
* frwr_wc_fastreg - Invoked by RDMA provider for a flushed FastReg WC
* @cq: completion queue (ignored)
* @wc: completed WR
*
@ -298,7 +298,7 @@ frwr_wc_fastreg(struct ib_cq *cq, struct ib_wc *wc)
}
/**
* frwr_wc_localinv - Invoked by RDMA provider for each polled LocalInv WC
* frwr_wc_localinv - Invoked by RDMA provider for a flushed LocalInv WC
* @cq: completion queue (ignored)
* @wc: completed WR
*
@ -319,7 +319,7 @@ frwr_wc_localinv(struct ib_cq *cq, struct ib_wc *wc)
}
/**
* frwr_wc_localinv - Invoked by RDMA provider for each polled LocalInv WC
* frwr_wc_localinv_wake - Invoked by RDMA provider for a signaled LocalInv WC
* @cq: completion queue (ignored)
* @wc: completed WR
*
@ -355,7 +355,7 @@ frwr_op_map(struct rpcrdma_xprt *r_xprt, struct rpcrdma_mr_seg *seg,
struct ib_mr *mr;
struct ib_reg_wr *reg_wr;
struct ib_send_wr *bad_wr;
int rc, i, n, dma_nents;
int rc, i, n;
u8 key;
mw = NULL;
@ -391,14 +391,10 @@ frwr_op_map(struct rpcrdma_xprt *r_xprt, struct rpcrdma_mr_seg *seg,
offset_in_page((seg-1)->mr_offset + (seg-1)->mr_len))
break;
}
mw->mw_nents = i;
mw->mw_dir = rpcrdma_data_dir(writing);
if (i == 0)
goto out_dmamap_err;
dma_nents = ib_dma_map_sg(ia->ri_device,
mw->mw_sg, mw->mw_nents, mw->mw_dir);
if (!dma_nents)
mw->mw_nents = ib_dma_map_sg(ia->ri_device, mw->mw_sg, i, mw->mw_dir);
if (!mw->mw_nents)
goto out_dmamap_err;
n = ib_map_mr_sg(mr, mw->mw_sg, mw->mw_nents, NULL, PAGE_SIZE);
@ -436,13 +432,14 @@ frwr_op_map(struct rpcrdma_xprt *r_xprt, struct rpcrdma_mr_seg *seg,
return mw->mw_nents;
out_dmamap_err:
pr_err("rpcrdma: failed to dma map sg %p sg_nents %u\n",
mw->mw_sg, mw->mw_nents);
rpcrdma_defer_mr_recovery(mw);
pr_err("rpcrdma: failed to DMA map sg %p sg_nents %d\n",
mw->mw_sg, i);
frmr->fr_state = FRMR_IS_INVALID;
rpcrdma_put_mw(r_xprt, mw);
return -EIO;
out_mapmr_err:
pr_err("rpcrdma: failed to map mr %p (%u/%u)\n",
pr_err("rpcrdma: failed to map mr %p (%d/%d)\n",
frmr->fr_mr, n, mw->mw_nents);
rpcrdma_defer_mr_recovery(mw);
return -EIO;
@ -458,21 +455,19 @@ out_senderr:
* Sleeps until it is safe for the host CPU to access the
* previously mapped memory regions.
*
* Caller ensures that req->rl_registered is not empty.
* Caller ensures that @mws is not empty before the call. This
* function empties the list.
*/
static void
frwr_op_unmap_sync(struct rpcrdma_xprt *r_xprt, struct rpcrdma_req *req)
frwr_op_unmap_sync(struct rpcrdma_xprt *r_xprt, struct list_head *mws)
{
struct ib_send_wr *first, **prev, *last, *bad_wr;
struct rpcrdma_rep *rep = req->rl_reply;
struct rpcrdma_ia *ia = &r_xprt->rx_ia;
struct rpcrdma_frmr *f;
struct rpcrdma_mw *mw;
int count, rc;
dprintk("RPC: %s: req %p\n", __func__, req);
/* ORDER: Invalidate all of the req's MRs first
/* ORDER: Invalidate all of the MRs first
*
* Chain the LOCAL_INV Work Requests and post them with
* a single ib_post_send() call.
@ -480,11 +475,10 @@ frwr_op_unmap_sync(struct rpcrdma_xprt *r_xprt, struct rpcrdma_req *req)
f = NULL;
count = 0;
prev = &first;
list_for_each_entry(mw, &req->rl_registered, mw_list) {
list_for_each_entry(mw, mws, mw_list) {
mw->frmr.fr_state = FRMR_IS_INVALID;
if ((rep->rr_wc_flags & IB_WC_WITH_INVALIDATE) &&
(mw->mw_handle == rep->rr_inv_rkey))
if (mw->mw_flags & RPCRDMA_MW_F_RI)
continue;
f = &mw->frmr;
@ -524,18 +518,19 @@ frwr_op_unmap_sync(struct rpcrdma_xprt *r_xprt, struct rpcrdma_req *req)
* unless ri_id->qp is a valid pointer.
*/
r_xprt->rx_stats.local_inv_needed++;
bad_wr = NULL;
rc = ib_post_send(ia->ri_id->qp, first, &bad_wr);
if (bad_wr != first)
wait_for_completion(&f->fr_linv_done);
if (rc)
goto reset_mrs;
wait_for_completion(&f->fr_linv_done);
/* ORDER: Now DMA unmap all of the req's MRs, and return
/* ORDER: Now DMA unmap all of the MRs, and return
* them to the free MW list.
*/
unmap:
while (!list_empty(&req->rl_registered)) {
mw = rpcrdma_pop_mw(&req->rl_registered);
while (!list_empty(mws)) {
mw = rpcrdma_pop_mw(mws);
dprintk("RPC: %s: DMA unmapping frmr %p\n",
__func__, &mw->frmr);
ib_dma_unmap_sg(ia->ri_device,
@ -546,17 +541,19 @@ unmap:
reset_mrs:
pr_err("rpcrdma: FRMR invalidate ib_post_send returned %i\n", rc);
rdma_disconnect(ia->ri_id);
/* Find and reset the MRs in the LOCAL_INV WRs that did not
* get posted. This is synchronous, and slow.
* get posted.
*/
list_for_each_entry(mw, &req->rl_registered, mw_list) {
f = &mw->frmr;
if (mw->mw_handle == bad_wr->ex.invalidate_rkey) {
__frwr_reset_mr(ia, mw);
bad_wr = bad_wr->next;
}
rpcrdma_init_cqcount(&r_xprt->rx_ep, -count);
while (bad_wr) {
f = container_of(bad_wr, struct rpcrdma_frmr,
fr_invwr);
mw = container_of(f, struct rpcrdma_mw, frmr);
__frwr_reset_mr(ia, mw);
bad_wr = bad_wr->next;
}
goto unmap;
}

View File

@ -141,7 +141,7 @@ static bool rpcrdma_args_inline(struct rpcrdma_xprt *r_xprt,
if (xdr->page_len) {
remaining = xdr->page_len;
offset = xdr->page_base & ~PAGE_MASK;
offset = offset_in_page(xdr->page_base);
count = 0;
while (remaining) {
remaining -= min_t(unsigned int,
@ -222,7 +222,7 @@ rpcrdma_convert_iovs(struct rpcrdma_xprt *r_xprt, struct xdr_buf *xdrbuf,
len = xdrbuf->page_len;
ppages = xdrbuf->pages + (xdrbuf->page_base >> PAGE_SHIFT);
page_base = xdrbuf->page_base & ~PAGE_MASK;
page_base = offset_in_page(xdrbuf->page_base);
p = 0;
while (len && n < RPCRDMA_MAX_SEGS) {
if (!ppages[p]) {
@ -540,7 +540,7 @@ rpcrdma_prepare_msg_sges(struct rpcrdma_ia *ia, struct rpcrdma_req *req,
goto out;
page = virt_to_page(xdr->tail[0].iov_base);
page_base = (unsigned long)xdr->tail[0].iov_base & ~PAGE_MASK;
page_base = offset_in_page(xdr->tail[0].iov_base);
/* If the content in the page list is an odd length,
* xdr_write_pages() has added a pad at the beginning
@ -557,7 +557,7 @@ rpcrdma_prepare_msg_sges(struct rpcrdma_ia *ia, struct rpcrdma_req *req,
*/
if (xdr->page_len) {
ppages = xdr->pages + (xdr->page_base >> PAGE_SHIFT);
page_base = xdr->page_base & ~PAGE_MASK;
page_base = offset_in_page(xdr->page_base);
remaining = xdr->page_len;
while (remaining) {
sge_no++;
@ -587,7 +587,7 @@ rpcrdma_prepare_msg_sges(struct rpcrdma_ia *ia, struct rpcrdma_req *req,
*/
if (xdr->tail[0].iov_len) {
page = virt_to_page(xdr->tail[0].iov_base);
page_base = (unsigned long)xdr->tail[0].iov_base & ~PAGE_MASK;
page_base = offset_in_page(xdr->tail[0].iov_base);
len = xdr->tail[0].iov_len;
map_tail:
@ -734,6 +734,9 @@ rpcrdma_marshal_req(struct rpc_rqst *rqst)
rpclen = 0;
}
req->rl_xid = rqst->rq_xid;
rpcrdma_insert_req(&r_xprt->rx_buf, req);
/* This implementation supports the following combinations
* of chunk lists in one RPC-over-RDMA Call message:
*
@ -875,9 +878,9 @@ rpcrdma_inline_fixup(struct rpc_rqst *rqst, char *srcp, int copy_len, int pad)
srcp += curlen;
copy_len -= curlen;
page_base = rqst->rq_rcv_buf.page_base;
ppages = rqst->rq_rcv_buf.pages + (page_base >> PAGE_SHIFT);
page_base &= ~PAGE_MASK;
ppages = rqst->rq_rcv_buf.pages +
(rqst->rq_rcv_buf.page_base >> PAGE_SHIFT);
page_base = offset_in_page(rqst->rq_rcv_buf.page_base);
fixup_copy_count = 0;
if (copy_len && rqst->rq_rcv_buf.page_len) {
int pagelist_len;
@ -928,6 +931,24 @@ rpcrdma_inline_fixup(struct rpc_rqst *rqst, char *srcp, int copy_len, int pad)
return fixup_copy_count;
}
/* Caller must guarantee @rep remains stable during this call.
*/
static void
rpcrdma_mark_remote_invalidation(struct list_head *mws,
struct rpcrdma_rep *rep)
{
struct rpcrdma_mw *mw;
if (!(rep->rr_wc_flags & IB_WC_WITH_INVALIDATE))
return;
list_for_each_entry(mw, mws, mw_list)
if (mw->mw_handle == rep->rr_inv_rkey) {
mw->mw_flags = RPCRDMA_MW_F_RI;
break; /* only one invalidated MR per RPC */
}
}
#if defined(CONFIG_SUNRPC_BACKCHANNEL)
/* By convention, backchannel calls arrive via rdma_msg type
* messages, and never populate the chunk lists. This makes
@ -969,14 +990,16 @@ rpcrdma_reply_handler(struct work_struct *work)
{
struct rpcrdma_rep *rep =
container_of(work, struct rpcrdma_rep, rr_work);
struct rpcrdma_xprt *r_xprt = rep->rr_rxprt;
struct rpcrdma_buffer *buf = &r_xprt->rx_buf;
struct rpc_xprt *xprt = &r_xprt->rx_xprt;
struct rpcrdma_msg *headerp;
struct rpcrdma_req *req;
struct rpc_rqst *rqst;
struct rpcrdma_xprt *r_xprt = rep->rr_rxprt;
struct rpc_xprt *xprt = &r_xprt->rx_xprt;
__be32 *iptr;
int rdmalen, status, rmerr;
unsigned long cwnd;
struct list_head mws;
dprintk("RPC: %s: incoming rep %p\n", __func__, rep);
@ -994,27 +1017,45 @@ rpcrdma_reply_handler(struct work_struct *work)
/* Match incoming rpcrdma_rep to an rpcrdma_req to
* get context for handling any incoming chunks.
*/
spin_lock_bh(&xprt->transport_lock);
rqst = xprt_lookup_rqst(xprt, headerp->rm_xid);
if (!rqst)
spin_lock(&buf->rb_lock);
req = rpcrdma_lookup_req_locked(&r_xprt->rx_buf,
headerp->rm_xid);
if (!req)
goto out_nomatch;
req = rpcr_to_rdmar(rqst);
if (req->rl_reply)
goto out_duplicate;
/* Sanity checking has passed. We are now committed
* to complete this transaction.
list_replace_init(&req->rl_registered, &mws);
rpcrdma_mark_remote_invalidation(&mws, rep);
/* Avoid races with signals and duplicate replies
* by marking this req as matched.
*/
list_del_init(&rqst->rq_list);
spin_unlock_bh(&xprt->transport_lock);
req->rl_reply = rep;
spin_unlock(&buf->rb_lock);
dprintk("RPC: %s: reply %p completes request %p (xid 0x%08x)\n",
__func__, rep, req, be32_to_cpu(headerp->rm_xid));
/* from here on, the reply is no longer an orphan */
req->rl_reply = rep;
xprt->reestablish_timeout = 0;
/* Invalidate and unmap the data payloads before waking the
* waiting application. This guarantees the memory regions
* are properly fenced from the server before the application
* accesses the data. It also ensures proper send flow control:
* waking the next RPC waits until this RPC has relinquished
* all its Send Queue entries.
*/
if (!list_empty(&mws))
r_xprt->rx_ia.ri_ops->ro_unmap_sync(r_xprt, &mws);
/* Perform XID lookup, reconstruction of the RPC reply, and
* RPC completion while holding the transport lock to ensure
* the rep, rqst, and rq_task pointers remain stable.
*/
spin_lock_bh(&xprt->transport_lock);
rqst = xprt_lookup_rqst(xprt, headerp->rm_xid);
if (!rqst)
goto out_norqst;
xprt->reestablish_timeout = 0;
if (headerp->rm_vers != rpcrdma_version)
goto out_badversion;
@ -1024,12 +1065,9 @@ rpcrdma_reply_handler(struct work_struct *work)
case rdma_msg:
/* never expect read chunks */
/* never expect reply chunks (two ways to check) */
/* never expect write chunks without having offered RDMA */
if (headerp->rm_body.rm_chunks[0] != xdr_zero ||
(headerp->rm_body.rm_chunks[1] == xdr_zero &&
headerp->rm_body.rm_chunks[2] != xdr_zero) ||
(headerp->rm_body.rm_chunks[1] != xdr_zero &&
list_empty(&req->rl_registered)))
headerp->rm_body.rm_chunks[2] != xdr_zero))
goto badheader;
if (headerp->rm_body.rm_chunks[1] != xdr_zero) {
/* count any expected write chunks in read reply */
@ -1066,8 +1104,7 @@ rpcrdma_reply_handler(struct work_struct *work)
/* never expect read or write chunks, always reply chunks */
if (headerp->rm_body.rm_chunks[0] != xdr_zero ||
headerp->rm_body.rm_chunks[1] != xdr_zero ||
headerp->rm_body.rm_chunks[2] != xdr_one ||
list_empty(&req->rl_registered))
headerp->rm_body.rm_chunks[2] != xdr_one)
goto badheader;
iptr = (__be32 *)((unsigned char *)headerp +
RPCRDMA_HDRLEN_MIN);
@ -1093,17 +1130,6 @@ badheader:
}
out:
/* Invalidate and flush the data payloads before waking the
* waiting application. This guarantees the memory region is
* properly fenced from the server before the application
* accesses the data. It also ensures proper send flow
* control: waking the next RPC waits until this RPC has
* relinquished all its Send Queue entries.
*/
if (!list_empty(&req->rl_registered))
r_xprt->rx_ia.ri_ops->ro_unmap_sync(r_xprt, req);
spin_lock_bh(&xprt->transport_lock);
cwnd = xprt->cwnd;
xprt->cwnd = atomic_read(&r_xprt->rx_buf.rb_credits) << RPC_CWNDSHIFT;
if (xprt->cwnd > cwnd)
@ -1112,7 +1138,7 @@ out:
xprt_complete_rqst(rqst->rq_task, status);
spin_unlock_bh(&xprt->transport_lock);
dprintk("RPC: %s: xprt_complete_rqst(0x%p, 0x%p, %d)\n",
__func__, xprt, rqst, status);
__func__, xprt, rqst, status);
return;
out_badstatus:
@ -1161,26 +1187,37 @@ out_rdmaerr:
r_xprt->rx_stats.bad_reply_count++;
goto out;
/* If no pending RPC transaction was matched, post a replacement
* receive buffer before returning.
/* The req was still available, but by the time the transport_lock
* was acquired, the rqst and task had been released. Thus the RPC
* has already been terminated.
*/
out_norqst:
spin_unlock_bh(&xprt->transport_lock);
rpcrdma_buffer_put(req);
dprintk("RPC: %s: race, no rqst left for req %p\n",
__func__, req);
return;
out_shortreply:
dprintk("RPC: %s: short/invalid reply\n", __func__);
goto repost;
out_nomatch:
spin_unlock_bh(&xprt->transport_lock);
spin_unlock(&buf->rb_lock);
dprintk("RPC: %s: no match for incoming xid 0x%08x len %d\n",
__func__, be32_to_cpu(headerp->rm_xid),
rep->rr_len);
goto repost;
out_duplicate:
spin_unlock_bh(&xprt->transport_lock);
spin_unlock(&buf->rb_lock);
dprintk("RPC: %s: "
"duplicate reply %p to RPC request %p: xid 0x%08x\n",
__func__, rep, req, be32_to_cpu(headerp->rm_xid));
/* If no pending RPC transaction was matched, post a replacement
* receive buffer before returning.
*/
repost:
r_xprt->rx_stats.bad_reply_count++;
if (rpcrdma_ep_post_recv(&r_xprt->rx_ia, rep))

View File

@ -684,7 +684,8 @@ xprt_rdma_free(struct rpc_task *task)
dprintk("RPC: %s: called on 0x%p\n", __func__, req->rl_reply);
if (unlikely(!list_empty(&req->rl_registered)))
rpcrdma_remove_req(&r_xprt->rx_buf, req);
if (!list_empty(&req->rl_registered))
ia->ri_ops->ro_unmap_safe(r_xprt, req, !RPC_IS_ASYNC(task));
rpcrdma_unmap_sges(ia, req);
rpcrdma_buffer_put(req);

View File

@ -243,8 +243,6 @@ rpcrdma_conn_upcall(struct rdma_cm_id *id, struct rdma_cm_event *event)
#if IS_ENABLED(CONFIG_SUNRPC_DEBUG)
struct sockaddr *sap = (struct sockaddr *)&ep->rep_remote_addr;
#endif
struct ib_qp_attr *attr = &ia->ri_qp_attr;
struct ib_qp_init_attr *iattr = &ia->ri_qp_init_attr;
int connstate = 0;
switch (event->event) {
@ -267,7 +265,8 @@ rpcrdma_conn_upcall(struct rdma_cm_id *id, struct rdma_cm_event *event)
break;
case RDMA_CM_EVENT_DEVICE_REMOVAL:
#if IS_ENABLED(CONFIG_SUNRPC_DEBUG)
pr_info("rpcrdma: removing device for %pIS:%u\n",
pr_info("rpcrdma: removing device %s for %pIS:%u\n",
ia->ri_device->name,
sap, rpc_get_port(sap));
#endif
set_bit(RPCRDMA_IAF_REMOVING, &ia->ri_flags);
@ -282,13 +281,6 @@ rpcrdma_conn_upcall(struct rdma_cm_id *id, struct rdma_cm_event *event)
return 1;
case RDMA_CM_EVENT_ESTABLISHED:
connstate = 1;
ib_query_qp(ia->ri_id->qp, attr,
IB_QP_MAX_QP_RD_ATOMIC | IB_QP_MAX_DEST_RD_ATOMIC,
iattr);
dprintk("RPC: %s: %d responder resources"
" (%d initiator)\n",
__func__, attr->max_dest_rd_atomic,
attr->max_rd_atomic);
rpcrdma_update_connect_private(xprt, &event->param.conn);
goto connected;
case RDMA_CM_EVENT_CONNECT_ERROR:
@ -298,11 +290,9 @@ rpcrdma_conn_upcall(struct rdma_cm_id *id, struct rdma_cm_event *event)
connstate = -ENETDOWN;
goto connected;
case RDMA_CM_EVENT_REJECTED:
#if IS_ENABLED(CONFIG_SUNRPC_DEBUG)
pr_info("rpcrdma: connection to %pIS:%u on %s rejected: %s\n",
sap, rpc_get_port(sap), ia->ri_device->name,
dprintk("rpcrdma: connection to %pIS:%u rejected: %s\n",
sap, rpc_get_port(sap),
rdma_reject_msg(id, event->status));
#endif
connstate = -ECONNREFUSED;
if (event->status == IB_CM_REJ_STALE_CONN)
connstate = -EAGAIN;
@ -310,37 +300,19 @@ rpcrdma_conn_upcall(struct rdma_cm_id *id, struct rdma_cm_event *event)
case RDMA_CM_EVENT_DISCONNECTED:
connstate = -ECONNABORTED;
connected:
dprintk("RPC: %s: %sconnected\n",
__func__, connstate > 0 ? "" : "dis");
atomic_set(&xprt->rx_buf.rb_credits, 1);
ep->rep_connected = connstate;
rpcrdma_conn_func(ep);
wake_up_all(&ep->rep_connect_wait);
/*FALLTHROUGH*/
default:
dprintk("RPC: %s: %pIS:%u (ep 0x%p): %s\n",
__func__, sap, rpc_get_port(sap), ep,
rdma_event_msg(event->event));
dprintk("RPC: %s: %pIS:%u on %s/%s (ep 0x%p): %s\n",
__func__, sap, rpc_get_port(sap),
ia->ri_device->name, ia->ri_ops->ro_displayname,
ep, rdma_event_msg(event->event));
break;
}
#if IS_ENABLED(CONFIG_SUNRPC_DEBUG)
if (connstate == 1) {
int ird = attr->max_dest_rd_atomic;
int tird = ep->rep_remote_cma.responder_resources;
pr_info("rpcrdma: connection to %pIS:%u on %s, memreg '%s', %d credits, %d responders%s\n",
sap, rpc_get_port(sap),
ia->ri_device->name,
ia->ri_ops->ro_displayname,
xprt->rx_buf.rb_max_requests,
ird, ird < 4 && ird < tird / 2 ? " (low!)" : "");
} else if (connstate < 0) {
pr_info("rpcrdma: connection to %pIS:%u closed (%d)\n",
sap, rpc_get_port(sap), connstate);
}
#endif
return 0;
}
@ -971,7 +943,6 @@ rpcrdma_create_req(struct rpcrdma_xprt *r_xprt)
if (req == NULL)
return ERR_PTR(-ENOMEM);
INIT_LIST_HEAD(&req->rl_free);
spin_lock(&buffer->rb_reqslock);
list_add(&req->rl_all, &buffer->rb_allreqs);
spin_unlock(&buffer->rb_reqslock);
@ -1033,6 +1004,7 @@ rpcrdma_buffer_create(struct rpcrdma_xprt *r_xprt)
spin_lock_init(&buf->rb_recovery_lock);
INIT_LIST_HEAD(&buf->rb_mws);
INIT_LIST_HEAD(&buf->rb_all);
INIT_LIST_HEAD(&buf->rb_pending);
INIT_LIST_HEAD(&buf->rb_stale_mrs);
INIT_DELAYED_WORK(&buf->rb_refresh_worker,
rpcrdma_mr_refresh_worker);
@ -1055,7 +1027,7 @@ rpcrdma_buffer_create(struct rpcrdma_xprt *r_xprt)
goto out;
}
req->rl_backchannel = false;
list_add(&req->rl_free, &buf->rb_send_bufs);
list_add(&req->rl_list, &buf->rb_send_bufs);
}
INIT_LIST_HEAD(&buf->rb_recv_bufs);
@ -1084,8 +1056,8 @@ rpcrdma_buffer_get_req_locked(struct rpcrdma_buffer *buf)
struct rpcrdma_req *req;
req = list_first_entry(&buf->rb_send_bufs,
struct rpcrdma_req, rl_free);
list_del(&req->rl_free);
struct rpcrdma_req, rl_list);
list_del_init(&req->rl_list);
return req;
}
@ -1187,6 +1159,7 @@ rpcrdma_get_mw(struct rpcrdma_xprt *r_xprt)
if (!mw)
goto out_nomws;
mw->mw_flags = 0;
return mw;
out_nomws:
@ -1267,7 +1240,7 @@ rpcrdma_buffer_put(struct rpcrdma_req *req)
spin_lock(&buffers->rb_lock);
buffers->rb_send_count--;
list_add_tail(&req->rl_free, &buffers->rb_send_bufs);
list_add_tail(&req->rl_list, &buffers->rb_send_bufs);
if (rep) {
buffers->rb_recv_count--;
list_add_tail(&rep->rr_list, &buffers->rb_recv_bufs);

View File

@ -271,6 +271,7 @@ struct rpcrdma_mw {
struct scatterlist *mw_sg;
int mw_nents;
enum dma_data_direction mw_dir;
unsigned long mw_flags;
union {
struct rpcrdma_fmr fmr;
struct rpcrdma_frmr frmr;
@ -282,6 +283,11 @@ struct rpcrdma_mw {
struct list_head mw_all;
};
/* mw_flags */
enum {
RPCRDMA_MW_F_RI = 1,
};
/*
* struct rpcrdma_req -- structure central to the request/reply sequence.
*
@ -334,7 +340,8 @@ enum {
struct rpcrdma_buffer;
struct rpcrdma_req {
struct list_head rl_free;
struct list_head rl_list;
__be32 rl_xid;
unsigned int rl_mapped_sges;
unsigned int rl_connect_cookie;
struct rpcrdma_buffer *rl_buffer;
@ -396,6 +403,7 @@ struct rpcrdma_buffer {
int rb_send_count, rb_recv_count;
struct list_head rb_send_bufs;
struct list_head rb_recv_bufs;
struct list_head rb_pending;
u32 rb_max_requests;
atomic_t rb_credits; /* most recent credit grant */
@ -461,7 +469,7 @@ struct rpcrdma_memreg_ops {
struct rpcrdma_mr_seg *, int, bool,
struct rpcrdma_mw **);
void (*ro_unmap_sync)(struct rpcrdma_xprt *,
struct rpcrdma_req *);
struct list_head *);
void (*ro_unmap_safe)(struct rpcrdma_xprt *,
struct rpcrdma_req *, bool);
void (*ro_recover_mr)(struct rpcrdma_mw *);
@ -544,6 +552,34 @@ void rpcrdma_destroy_req(struct rpcrdma_req *);
int rpcrdma_buffer_create(struct rpcrdma_xprt *);
void rpcrdma_buffer_destroy(struct rpcrdma_buffer *);
static inline void
rpcrdma_insert_req(struct rpcrdma_buffer *buffers, struct rpcrdma_req *req)
{
spin_lock(&buffers->rb_lock);
if (list_empty(&req->rl_list))
list_add_tail(&req->rl_list, &buffers->rb_pending);
spin_unlock(&buffers->rb_lock);
}
static inline struct rpcrdma_req *
rpcrdma_lookup_req_locked(struct rpcrdma_buffer *buffers, __be32 xid)
{
struct rpcrdma_req *pos;
list_for_each_entry(pos, &buffers->rb_pending, rl_list)
if (pos->rl_xid == xid)
return pos;
return NULL;
}
static inline void
rpcrdma_remove_req(struct rpcrdma_buffer *buffers, struct rpcrdma_req *req)
{
spin_lock(&buffers->rb_lock);
list_del(&req->rl_list);
spin_unlock(&buffers->rb_lock);
}
struct rpcrdma_mw *rpcrdma_get_mw(struct rpcrdma_xprt *);
void rpcrdma_put_mw(struct rpcrdma_xprt *, struct rpcrdma_mw *);
struct rpcrdma_req *rpcrdma_buffer_get(struct rpcrdma_buffer *);