NFS Client Updates for Linux 5.12
- New Features: - Support for eager writes, and the write=eager and write=wait mount options - Other Bugfixes and Cleanups: - Fix typos in some comments - Fix up fall-through warnings for Clang - Cleanups to the NFS readpage codepath - Remove FMR support in rpcrdma_convert_iovs() - Various other cleanups to xprtrdma - Fix xprtrdma pad optimization for servers that don't support RFC 8797 - Improvements to rpcrdma tracepoints - Fix up nfs4_bitmask_adjust() - Optimize sparse writes past the end of files -----BEGIN PGP SIGNATURE----- iQIzBAABCAAdFiEEnZ5MQTpR7cLU7KEp18tUv7ClQOsFAmAwOLwACgkQ18tUv7Cl QOsUfw//W2KoJ+2IQohQNFcoi+bG1OQE7jnqHtQ+tsKfpJKemcDcu8wQEAqrwALg vXioG1Ye0QU7P5PZtNxCorylqSTVGvJSIOrfa3lTdn/PDbI7NIgN52w56TzzfeXn pJ4gDwZzPwUFUblF0LBQUIhJv5IQvOXVgUsMqezbIbMXSiuLR/bjnZ96Q/woKpoL eg2IZ5EO9Jb0QjuQ1e9U303X7c2qOl1jzpxyQLQfD7ONnWBx3HnJk1l+3JJRi8JV smnae3I0L3nUZ7rBqoqsvK7YUjUchCEBvkmEMsnHT94D5tI9mxxX5OquREee6QHn NuJRSNbsIiCD3Ne27fkCut78d6SetoMko7jZ97T6smhyijtXJiLG/6dycMPV9rt/ bVIudWMm9/A9AsXyY2YP5LC6Y6W6dhQRXygUjVgEPBl6kVsb2Eca8IA9QZghF9IL +XSEulASvxo2rWPylJJ+3aLynfqoHrowVN/Tu61svDnJWTcb+FCxQ5zyLox7erEH mUhraf1D0uoX9odH1069toN6favZFE6SIDvlUk1QTOjr6p3Jxmkuyl6PNs5t66/S 550z5JVb2deIHOPQxOie7xz/Dk6dnRoaFhTNq/Ootkt9GNe0A+NqSUdoRA5XxN5m wW11ecLSZSehDksuXjyFmkHtkagLreFxLsHbVnaAtwEm7h/thRI= =Dssn -----END PGP SIGNATURE----- Merge tag 'nfs-for-5.12-1' of git://git.linux-nfs.org/projects/anna/linux-nfs Pull NFS Client Updates from Anna Schumaker: "New Features: - Support for eager writes, and the write=eager and write=wait mount options - Other Bugfixes and Cleanups: - Fix typos in some comments - Fix up fall-through warnings for Clang - Cleanups to the NFS readpage codepath - Remove FMR support in rpcrdma_convert_iovs() - Various other cleanups to xprtrdma - Fix xprtrdma pad optimization for servers that don't support RFC 8797 - Improvements to rpcrdma tracepoints - Fix up nfs4_bitmask_adjust() - Optimize sparse writes past the end of files" * tag 'nfs-for-5.12-1' of git://git.linux-nfs.org/projects/anna/linux-nfs: (27 commits) NFS: Support the '-owrite=' option in /proc/self/mounts and mountinfo NFS: Set the stable writes flag when initialising the super block NFS: Add mount options supporting eager writes NFS: Add support for eager writes NFS: 'flags' field should be unsigned in struct nfs_server NFS: Don't set NFS_INO_INVALID_XATTR if there is no xattr cache NFS: Always clear an invalid mapping when attempting a buffered write NFS: Optimise sparse writes past the end of file NFS: Fix documenting comment for nfs_revalidate_file_size() NFSv4: Fixes for nfs4_bitmask_adjust() xprtrdma: Clean up rpcrdma_prepare_readch() rpcrdma: Capture bytes received in Receive completion tracepoints xprtrdma: Pad optimization, revisited rpcrdma: Fix comments about reverse-direction operation xprtrdma: Refactor invocations of offset_in_page() xprtrdma: Simplify rpcrdma_convert_kvec() and frwr_map() xprtrdma: Remove FMR support in rpcrdma_convert_iovs() NFS: Add nfs_pageio_complete_read() and remove nfs_readpage_async() NFS: Call readpage_async_filler() from nfs_readpage_async() NFS: Refactor nfs_readpage() and nfs_readpage_async() to use nfs_readdesc ...
This commit is contained in:
commit
1c9077cdec
|
@ -89,7 +89,7 @@ nfs_file_release(struct inode *inode, struct file *filp)
|
|||
EXPORT_SYMBOL_GPL(nfs_file_release);
|
||||
|
||||
/**
|
||||
* nfs_revalidate_size - Revalidate the file size
|
||||
* nfs_revalidate_file_size - Revalidate the file size
|
||||
* @inode: pointer to inode struct
|
||||
* @filp: pointer to struct file
|
||||
*
|
||||
|
@ -606,8 +606,8 @@ ssize_t nfs_file_write(struct kiocb *iocb, struct iov_iter *from)
|
|||
{
|
||||
struct file *file = iocb->ki_filp;
|
||||
struct inode *inode = file_inode(file);
|
||||
unsigned long written = 0;
|
||||
ssize_t result;
|
||||
unsigned int mntflags = NFS_SERVER(inode)->flags;
|
||||
ssize_t result, written;
|
||||
errseq_t since;
|
||||
int error;
|
||||
|
||||
|
@ -626,13 +626,13 @@ ssize_t nfs_file_write(struct kiocb *iocb, struct iov_iter *from)
|
|||
/*
|
||||
* O_APPEND implies that we must revalidate the file length.
|
||||
*/
|
||||
if (iocb->ki_flags & IOCB_APPEND) {
|
||||
if (iocb->ki_flags & IOCB_APPEND || iocb->ki_pos > i_size_read(inode)) {
|
||||
result = nfs_revalidate_file_size(inode, file);
|
||||
if (result)
|
||||
goto out;
|
||||
}
|
||||
if (iocb->ki_pos > i_size_read(inode))
|
||||
nfs_revalidate_mapping(inode, file->f_mapping);
|
||||
|
||||
nfs_clear_invalid_mapping(file->f_mapping);
|
||||
|
||||
since = filemap_sample_wb_err(file->f_mapping);
|
||||
nfs_start_io_write(inode);
|
||||
|
@ -648,6 +648,21 @@ ssize_t nfs_file_write(struct kiocb *iocb, struct iov_iter *from)
|
|||
|
||||
written = result;
|
||||
iocb->ki_pos += written;
|
||||
|
||||
if (mntflags & NFS_MOUNT_WRITE_EAGER) {
|
||||
result = filemap_fdatawrite_range(file->f_mapping,
|
||||
iocb->ki_pos - written,
|
||||
iocb->ki_pos - 1);
|
||||
if (result < 0)
|
||||
goto out;
|
||||
}
|
||||
if (mntflags & NFS_MOUNT_WRITE_WAIT) {
|
||||
result = filemap_fdatawait_range(file->f_mapping,
|
||||
iocb->ki_pos - written,
|
||||
iocb->ki_pos - 1);
|
||||
if (result < 0)
|
||||
goto out;
|
||||
}
|
||||
result = generic_write_sync(iocb, written);
|
||||
if (result < 0)
|
||||
goto out;
|
||||
|
|
|
@ -82,6 +82,7 @@ enum nfs_param {
|
|||
Opt_v,
|
||||
Opt_vers,
|
||||
Opt_wsize,
|
||||
Opt_write,
|
||||
};
|
||||
|
||||
enum {
|
||||
|
@ -113,6 +114,19 @@ static const struct constant_table nfs_param_enums_lookupcache[] = {
|
|||
{}
|
||||
};
|
||||
|
||||
enum {
|
||||
Opt_write_lazy,
|
||||
Opt_write_eager,
|
||||
Opt_write_wait,
|
||||
};
|
||||
|
||||
static const struct constant_table nfs_param_enums_write[] = {
|
||||
{ "lazy", Opt_write_lazy },
|
||||
{ "eager", Opt_write_eager },
|
||||
{ "wait", Opt_write_wait },
|
||||
{}
|
||||
};
|
||||
|
||||
static const struct fs_parameter_spec nfs_fs_parameters[] = {
|
||||
fsparam_flag_no("ac", Opt_ac),
|
||||
fsparam_u32 ("acdirmax", Opt_acdirmax),
|
||||
|
@ -171,6 +185,7 @@ static const struct fs_parameter_spec nfs_fs_parameters[] = {
|
|||
fsparam_flag ("v4.1", Opt_v),
|
||||
fsparam_flag ("v4.2", Opt_v),
|
||||
fsparam_string("vers", Opt_vers),
|
||||
fsparam_enum ("write", Opt_write, nfs_param_enums_write),
|
||||
fsparam_u32 ("wsize", Opt_wsize),
|
||||
{}
|
||||
};
|
||||
|
@ -770,6 +785,24 @@ static int nfs_fs_context_parse_param(struct fs_context *fc,
|
|||
goto out_invalid_value;
|
||||
}
|
||||
break;
|
||||
case Opt_write:
|
||||
switch (result.uint_32) {
|
||||
case Opt_write_lazy:
|
||||
ctx->flags &=
|
||||
~(NFS_MOUNT_WRITE_EAGER | NFS_MOUNT_WRITE_WAIT);
|
||||
break;
|
||||
case Opt_write_eager:
|
||||
ctx->flags |= NFS_MOUNT_WRITE_EAGER;
|
||||
ctx->flags &= ~NFS_MOUNT_WRITE_WAIT;
|
||||
break;
|
||||
case Opt_write_wait:
|
||||
ctx->flags |=
|
||||
NFS_MOUNT_WRITE_EAGER | NFS_MOUNT_WRITE_WAIT;
|
||||
break;
|
||||
default:
|
||||
goto out_invalid_value;
|
||||
}
|
||||
break;
|
||||
|
||||
/*
|
||||
* Special options
|
||||
|
@ -1479,6 +1512,8 @@ static int nfs_init_fs_context(struct fs_context *fc)
|
|||
ctx->selected_flavor = RPC_AUTH_MAXFLAVOR;
|
||||
ctx->minorversion = 0;
|
||||
ctx->need_mount = true;
|
||||
|
||||
fc->s_iflags |= SB_I_STABLE_WRITES;
|
||||
}
|
||||
fc->fs_private = ctx;
|
||||
fc->ops = &nfs_fs_context_ops;
|
||||
|
|
|
@ -390,10 +390,6 @@ static void nfs_readpage_from_fscache_complete(struct page *page,
|
|||
if (!error) {
|
||||
SetPageUptodate(page);
|
||||
unlock_page(page);
|
||||
} else {
|
||||
error = nfs_readpage_async(context, page->mapping->host, page);
|
||||
if (error)
|
||||
unlock_page(page);
|
||||
}
|
||||
}
|
||||
|
||||
|
|
127
fs/nfs/inode.c
127
fs/nfs/inode.c
|
@ -195,6 +195,18 @@ bool nfs_check_cache_invalid(struct inode *inode, unsigned long flags)
|
|||
}
|
||||
EXPORT_SYMBOL_GPL(nfs_check_cache_invalid);
|
||||
|
||||
#ifdef CONFIG_NFS_V4_2
|
||||
static bool nfs_has_xattr_cache(const struct nfs_inode *nfsi)
|
||||
{
|
||||
return nfsi->xattr_cache != NULL;
|
||||
}
|
||||
#else
|
||||
static bool nfs_has_xattr_cache(const struct nfs_inode *nfsi)
|
||||
{
|
||||
return false;
|
||||
}
|
||||
#endif
|
||||
|
||||
static void nfs_set_cache_invalid(struct inode *inode, unsigned long flags)
|
||||
{
|
||||
struct nfs_inode *nfsi = NFS_I(inode);
|
||||
|
@ -209,6 +221,8 @@ static void nfs_set_cache_invalid(struct inode *inode, unsigned long flags)
|
|||
| NFS_INO_INVALID_XATTR);
|
||||
}
|
||||
|
||||
if (!nfs_has_xattr_cache(nfsi))
|
||||
flags &= ~NFS_INO_INVALID_XATTR;
|
||||
if (inode->i_mapping->nrpages == 0)
|
||||
flags &= ~(NFS_INO_INVALID_DATA|NFS_INO_DATA_INVAL_DEFER);
|
||||
nfsi->cache_validity |= flags;
|
||||
|
@ -1258,6 +1272,63 @@ static int nfs_invalidate_mapping(struct inode *inode, struct address_space *map
|
|||
return 0;
|
||||
}
|
||||
|
||||
/**
|
||||
* nfs_clear_invalid_mapping - Conditionally clear a mapping
|
||||
* @mapping: pointer to mapping
|
||||
*
|
||||
* If the NFS_INO_INVALID_DATA inode flag is set, clear the mapping.
|
||||
*/
|
||||
int nfs_clear_invalid_mapping(struct address_space *mapping)
|
||||
{
|
||||
struct inode *inode = mapping->host;
|
||||
struct nfs_inode *nfsi = NFS_I(inode);
|
||||
unsigned long *bitlock = &nfsi->flags;
|
||||
int ret = 0;
|
||||
|
||||
/*
|
||||
* We must clear NFS_INO_INVALID_DATA first to ensure that
|
||||
* invalidations that come in while we're shooting down the mappings
|
||||
* are respected. But, that leaves a race window where one revalidator
|
||||
* can clear the flag, and then another checks it before the mapping
|
||||
* gets invalidated. Fix that by serializing access to this part of
|
||||
* the function.
|
||||
*
|
||||
* At the same time, we need to allow other tasks to see whether we
|
||||
* might be in the middle of invalidating the pages, so we only set
|
||||
* the bit lock here if it looks like we're going to be doing that.
|
||||
*/
|
||||
for (;;) {
|
||||
ret = wait_on_bit_action(bitlock, NFS_INO_INVALIDATING,
|
||||
nfs_wait_bit_killable, TASK_KILLABLE);
|
||||
if (ret)
|
||||
goto out;
|
||||
spin_lock(&inode->i_lock);
|
||||
if (test_bit(NFS_INO_INVALIDATING, bitlock)) {
|
||||
spin_unlock(&inode->i_lock);
|
||||
continue;
|
||||
}
|
||||
if (nfsi->cache_validity & NFS_INO_INVALID_DATA)
|
||||
break;
|
||||
spin_unlock(&inode->i_lock);
|
||||
goto out;
|
||||
}
|
||||
|
||||
set_bit(NFS_INO_INVALIDATING, bitlock);
|
||||
smp_wmb();
|
||||
nfsi->cache_validity &=
|
||||
~(NFS_INO_INVALID_DATA | NFS_INO_DATA_INVAL_DEFER);
|
||||
spin_unlock(&inode->i_lock);
|
||||
trace_nfs_invalidate_mapping_enter(inode);
|
||||
ret = nfs_invalidate_mapping(inode, mapping);
|
||||
trace_nfs_invalidate_mapping_exit(inode, ret);
|
||||
|
||||
clear_bit_unlock(NFS_INO_INVALIDATING, bitlock);
|
||||
smp_mb__after_atomic();
|
||||
wake_up_bit(bitlock, NFS_INO_INVALIDATING);
|
||||
out:
|
||||
return ret;
|
||||
}
|
||||
|
||||
bool nfs_mapping_need_revalidate_inode(struct inode *inode)
|
||||
{
|
||||
return nfs_check_cache_invalid(inode, NFS_INO_REVAL_PAGECACHE) ||
|
||||
|
@ -1290,65 +1361,19 @@ out:
|
|||
* @inode: pointer to host inode
|
||||
* @mapping: pointer to mapping
|
||||
*/
|
||||
int nfs_revalidate_mapping(struct inode *inode,
|
||||
struct address_space *mapping)
|
||||
int nfs_revalidate_mapping(struct inode *inode, struct address_space *mapping)
|
||||
{
|
||||
struct nfs_inode *nfsi = NFS_I(inode);
|
||||
unsigned long *bitlock = &nfsi->flags;
|
||||
int ret = 0;
|
||||
|
||||
/* swapfiles are not supposed to be shared. */
|
||||
if (IS_SWAPFILE(inode))
|
||||
goto out;
|
||||
return 0;
|
||||
|
||||
if (nfs_mapping_need_revalidate_inode(inode)) {
|
||||
ret = __nfs_revalidate_inode(NFS_SERVER(inode), inode);
|
||||
int ret = __nfs_revalidate_inode(NFS_SERVER(inode), inode);
|
||||
if (ret < 0)
|
||||
goto out;
|
||||
return ret;
|
||||
}
|
||||
|
||||
/*
|
||||
* We must clear NFS_INO_INVALID_DATA first to ensure that
|
||||
* invalidations that come in while we're shooting down the mappings
|
||||
* are respected. But, that leaves a race window where one revalidator
|
||||
* can clear the flag, and then another checks it before the mapping
|
||||
* gets invalidated. Fix that by serializing access to this part of
|
||||
* the function.
|
||||
*
|
||||
* At the same time, we need to allow other tasks to see whether we
|
||||
* might be in the middle of invalidating the pages, so we only set
|
||||
* the bit lock here if it looks like we're going to be doing that.
|
||||
*/
|
||||
for (;;) {
|
||||
ret = wait_on_bit_action(bitlock, NFS_INO_INVALIDATING,
|
||||
nfs_wait_bit_killable, TASK_KILLABLE);
|
||||
if (ret)
|
||||
goto out;
|
||||
spin_lock(&inode->i_lock);
|
||||
if (test_bit(NFS_INO_INVALIDATING, bitlock)) {
|
||||
spin_unlock(&inode->i_lock);
|
||||
continue;
|
||||
}
|
||||
if (nfsi->cache_validity & NFS_INO_INVALID_DATA)
|
||||
break;
|
||||
spin_unlock(&inode->i_lock);
|
||||
goto out;
|
||||
}
|
||||
|
||||
set_bit(NFS_INO_INVALIDATING, bitlock);
|
||||
smp_wmb();
|
||||
nfsi->cache_validity &= ~(NFS_INO_INVALID_DATA|
|
||||
NFS_INO_DATA_INVAL_DEFER);
|
||||
spin_unlock(&inode->i_lock);
|
||||
trace_nfs_invalidate_mapping_enter(inode);
|
||||
ret = nfs_invalidate_mapping(inode, mapping);
|
||||
trace_nfs_invalidate_mapping_exit(inode, ret);
|
||||
|
||||
clear_bit_unlock(NFS_INO_INVALIDATING, bitlock);
|
||||
smp_mb__after_atomic();
|
||||
wake_up_bit(bitlock, NFS_INO_INVALIDATING);
|
||||
out:
|
||||
return ret;
|
||||
return nfs_clear_invalid_mapping(mapping);
|
||||
}
|
||||
|
||||
static bool nfs_file_has_writers(struct nfs_inode *nfsi)
|
||||
|
|
|
@ -111,6 +111,7 @@ struct posix_acl *nfs3_get_acl(struct inode *inode, int type)
|
|||
fallthrough;
|
||||
case -ENOTSUPP:
|
||||
status = -EOPNOTSUPP;
|
||||
goto getout;
|
||||
default:
|
||||
goto getout;
|
||||
}
|
||||
|
|
|
@ -609,6 +609,7 @@ found:
|
|||
* changed. Schedule recovery!
|
||||
*/
|
||||
nfs4_schedule_path_down_recovery(pos);
|
||||
goto out;
|
||||
default:
|
||||
goto out;
|
||||
}
|
||||
|
|
|
@ -71,10 +71,6 @@
|
|||
|
||||
#include "nfs4trace.h"
|
||||
|
||||
#ifdef CONFIG_NFS_V4_2
|
||||
#include "nfs42.h"
|
||||
#endif /* CONFIG_NFS_V4_2 */
|
||||
|
||||
#define NFSDBG_FACILITY NFSDBG_PROC
|
||||
|
||||
#define NFS4_BITMASK_SZ 3
|
||||
|
@ -2231,6 +2227,7 @@ static int nfs4_handle_delegation_recall_error(struct nfs_server *server, struct
|
|||
default:
|
||||
printk(KERN_ERR "NFS: %s: unhandled error "
|
||||
"%d.\n", __func__, err);
|
||||
fallthrough;
|
||||
case 0:
|
||||
case -ENOENT:
|
||||
case -EAGAIN:
|
||||
|
@ -5438,15 +5435,16 @@ static void nfs4_bitmask_adjust(__u32 *bitmask, struct inode *inode,
|
|||
|
||||
if (cache_validity & NFS_INO_INVALID_ATIME)
|
||||
bitmask[1] |= FATTR4_WORD1_TIME_ACCESS;
|
||||
if (cache_validity & NFS_INO_INVALID_ACCESS)
|
||||
bitmask[0] |= FATTR4_WORD1_MODE | FATTR4_WORD1_OWNER |
|
||||
FATTR4_WORD1_OWNER_GROUP;
|
||||
if (cache_validity & NFS_INO_INVALID_ACL)
|
||||
bitmask[0] |= FATTR4_WORD0_ACL;
|
||||
if (cache_validity & NFS_INO_INVALID_LABEL)
|
||||
if (cache_validity & NFS_INO_INVALID_OTHER)
|
||||
bitmask[1] |= FATTR4_WORD1_MODE | FATTR4_WORD1_OWNER |
|
||||
FATTR4_WORD1_OWNER_GROUP |
|
||||
FATTR4_WORD1_NUMLINKS;
|
||||
if (label && label->len && cache_validity & NFS_INO_INVALID_LABEL)
|
||||
bitmask[2] |= FATTR4_WORD2_SECURITY_LABEL;
|
||||
if (cache_validity & NFS_INO_INVALID_CTIME)
|
||||
if (cache_validity & NFS_INO_INVALID_CHANGE)
|
||||
bitmask[0] |= FATTR4_WORD0_CHANGE;
|
||||
if (cache_validity & NFS_INO_INVALID_CTIME)
|
||||
bitmask[1] |= FATTR4_WORD1_TIME_METADATA;
|
||||
if (cache_validity & NFS_INO_INVALID_MTIME)
|
||||
bitmask[1] |= FATTR4_WORD1_TIME_MODIFY;
|
||||
if (cache_validity & NFS_INO_INVALID_SIZE)
|
||||
|
@ -9708,6 +9706,7 @@ nfs4_layoutcommit_done(struct rpc_task *task, void *calldata)
|
|||
case -NFS4ERR_BADLAYOUT: /* no layout */
|
||||
case -NFS4ERR_GRACE: /* loca_recalim always false */
|
||||
task->tk_status = 0;
|
||||
break;
|
||||
case 0:
|
||||
break;
|
||||
default:
|
||||
|
|
|
@ -1125,6 +1125,7 @@ static void nfs_increment_seqid(int status, struct nfs_seqid *seqid)
|
|||
" sequence-id error on an"
|
||||
" unconfirmed sequence %p!\n",
|
||||
seqid->sequence);
|
||||
return;
|
||||
case -NFS4ERR_STALE_CLIENTID:
|
||||
case -NFS4ERR_STALE_STATEID:
|
||||
case -NFS4ERR_BAD_STATEID:
|
||||
|
|
|
@ -2875,6 +2875,7 @@ pnfs_do_write(struct nfs_pageio_descriptor *desc,
|
|||
switch (trypnfs) {
|
||||
case PNFS_NOT_ATTEMPTED:
|
||||
pnfs_write_through_mds(desc, hdr);
|
||||
break;
|
||||
case PNFS_ATTEMPTED:
|
||||
break;
|
||||
case PNFS_TRY_AGAIN:
|
||||
|
@ -3019,6 +3020,7 @@ pnfs_do_read(struct nfs_pageio_descriptor *desc, struct nfs_pgio_header *hdr)
|
|||
switch (trypnfs) {
|
||||
case PNFS_NOT_ATTEMPTED:
|
||||
pnfs_read_through_mds(desc, hdr);
|
||||
break;
|
||||
case PNFS_ATTEMPTED:
|
||||
break;
|
||||
case PNFS_TRY_AGAIN:
|
||||
|
|
232
fs/nfs/read.c
232
fs/nfs/read.c
|
@ -74,6 +74,24 @@ void nfs_pageio_init_read(struct nfs_pageio_descriptor *pgio,
|
|||
}
|
||||
EXPORT_SYMBOL_GPL(nfs_pageio_init_read);
|
||||
|
||||
static void nfs_pageio_complete_read(struct nfs_pageio_descriptor *pgio,
|
||||
struct inode *inode)
|
||||
{
|
||||
struct nfs_pgio_mirror *pgm;
|
||||
unsigned long npages;
|
||||
|
||||
nfs_pageio_complete(pgio);
|
||||
|
||||
/* It doesn't make sense to do mirrored reads! */
|
||||
WARN_ON_ONCE(pgio->pg_mirror_count != 1);
|
||||
|
||||
pgm = &pgio->pg_mirrors[0];
|
||||
NFS_I(inode)->read_io += pgm->pg_bytes_written;
|
||||
npages = (pgm->pg_bytes_written + PAGE_SIZE - 1) >> PAGE_SHIFT;
|
||||
nfs_add_stats(inode, NFSIOS_READPAGES, npages);
|
||||
}
|
||||
|
||||
|
||||
void nfs_pageio_reset_read_mds(struct nfs_pageio_descriptor *pgio)
|
||||
{
|
||||
struct nfs_pgio_mirror *mirror;
|
||||
|
@ -114,41 +132,10 @@ static void nfs_readpage_release(struct nfs_page *req, int error)
|
|||
nfs_release_request(req);
|
||||
}
|
||||
|
||||
int nfs_readpage_async(struct nfs_open_context *ctx, struct inode *inode,
|
||||
struct page *page)
|
||||
{
|
||||
struct nfs_page *new;
|
||||
unsigned int len;
|
||||
struct nfs_readdesc {
|
||||
struct nfs_pageio_descriptor pgio;
|
||||
struct nfs_pgio_mirror *pgm;
|
||||
|
||||
len = nfs_page_length(page);
|
||||
if (len == 0)
|
||||
return nfs_return_empty_page(page);
|
||||
new = nfs_create_request(ctx, page, 0, len);
|
||||
if (IS_ERR(new)) {
|
||||
unlock_page(page);
|
||||
return PTR_ERR(new);
|
||||
}
|
||||
if (len < PAGE_SIZE)
|
||||
zero_user_segment(page, len, PAGE_SIZE);
|
||||
|
||||
nfs_pageio_init_read(&pgio, inode, false,
|
||||
&nfs_async_read_completion_ops);
|
||||
if (!nfs_pageio_add_request(&pgio, new)) {
|
||||
nfs_list_remove_request(new);
|
||||
nfs_readpage_release(new, pgio.pg_error);
|
||||
}
|
||||
nfs_pageio_complete(&pgio);
|
||||
|
||||
/* It doesn't make sense to do mirrored reads! */
|
||||
WARN_ON_ONCE(pgio.pg_mirror_count != 1);
|
||||
|
||||
pgm = &pgio.pg_mirrors[0];
|
||||
NFS_I(inode)->read_io += pgm->pg_bytes_written;
|
||||
|
||||
return pgio.pg_error < 0 ? pgio.pg_error : 0;
|
||||
}
|
||||
struct nfs_open_context *ctx;
|
||||
};
|
||||
|
||||
static void nfs_page_group_set_uptodate(struct nfs_page *req)
|
||||
{
|
||||
|
@ -171,8 +158,7 @@ static void nfs_read_completion(struct nfs_pgio_header *hdr)
|
|||
|
||||
if (test_bit(NFS_IOHDR_EOF, &hdr->flags)) {
|
||||
/* note: regions of the page not covered by a
|
||||
* request are zeroed in nfs_readpage_async /
|
||||
* readpage_async_filler */
|
||||
* request are zeroed in readpage_async_filler */
|
||||
if (bytes > hdr->good_bytes) {
|
||||
/* nothing in this request was good, so zero
|
||||
* the full extent of the request */
|
||||
|
@ -304,78 +290,10 @@ static void nfs_readpage_result(struct rpc_task *task,
|
|||
nfs_readpage_retry(task, hdr);
|
||||
}
|
||||
|
||||
/*
|
||||
* Read a page over NFS.
|
||||
* We read the page synchronously in the following case:
|
||||
* - The error flag is set for this page. This happens only when a
|
||||
* previous async read operation failed.
|
||||
*/
|
||||
int nfs_readpage(struct file *file, struct page *page)
|
||||
{
|
||||
struct nfs_open_context *ctx;
|
||||
struct inode *inode = page_file_mapping(page)->host;
|
||||
int error;
|
||||
|
||||
dprintk("NFS: nfs_readpage (%p %ld@%lu)\n",
|
||||
page, PAGE_SIZE, page_index(page));
|
||||
nfs_inc_stats(inode, NFSIOS_VFSREADPAGE);
|
||||
nfs_add_stats(inode, NFSIOS_READPAGES, 1);
|
||||
|
||||
/*
|
||||
* Try to flush any pending writes to the file..
|
||||
*
|
||||
* NOTE! Because we own the page lock, there cannot
|
||||
* be any new pending writes generated at this point
|
||||
* for this page (other pages can be written to).
|
||||
*/
|
||||
error = nfs_wb_page(inode, page);
|
||||
if (error)
|
||||
goto out_unlock;
|
||||
if (PageUptodate(page))
|
||||
goto out_unlock;
|
||||
|
||||
error = -ESTALE;
|
||||
if (NFS_STALE(inode))
|
||||
goto out_unlock;
|
||||
|
||||
if (file == NULL) {
|
||||
error = -EBADF;
|
||||
ctx = nfs_find_open_context(inode, NULL, FMODE_READ);
|
||||
if (ctx == NULL)
|
||||
goto out_unlock;
|
||||
} else
|
||||
ctx = get_nfs_open_context(nfs_file_open_context(file));
|
||||
|
||||
if (!IS_SYNC(inode)) {
|
||||
error = nfs_readpage_from_fscache(ctx, inode, page);
|
||||
if (error == 0)
|
||||
goto out;
|
||||
}
|
||||
|
||||
xchg(&ctx->error, 0);
|
||||
error = nfs_readpage_async(ctx, inode, page);
|
||||
if (!error) {
|
||||
error = wait_on_page_locked_killable(page);
|
||||
if (!PageUptodate(page) && !error)
|
||||
error = xchg(&ctx->error, 0);
|
||||
}
|
||||
out:
|
||||
put_nfs_open_context(ctx);
|
||||
return error;
|
||||
out_unlock:
|
||||
unlock_page(page);
|
||||
return error;
|
||||
}
|
||||
|
||||
struct nfs_readdesc {
|
||||
struct nfs_pageio_descriptor *pgio;
|
||||
struct nfs_open_context *ctx;
|
||||
};
|
||||
|
||||
static int
|
||||
readpage_async_filler(void *data, struct page *page)
|
||||
{
|
||||
struct nfs_readdesc *desc = (struct nfs_readdesc *)data;
|
||||
struct nfs_readdesc *desc = data;
|
||||
struct nfs_page *new;
|
||||
unsigned int len;
|
||||
int error;
|
||||
|
@ -390,9 +308,9 @@ readpage_async_filler(void *data, struct page *page)
|
|||
|
||||
if (len < PAGE_SIZE)
|
||||
zero_user_segment(page, len, PAGE_SIZE);
|
||||
if (!nfs_pageio_add_request(desc->pgio, new)) {
|
||||
if (!nfs_pageio_add_request(&desc->pgio, new)) {
|
||||
nfs_list_remove_request(new);
|
||||
error = desc->pgio->pg_error;
|
||||
error = desc->pgio.pg_error;
|
||||
nfs_readpage_release(new, error);
|
||||
goto out;
|
||||
}
|
||||
|
@ -404,17 +322,82 @@ out:
|
|||
return error;
|
||||
}
|
||||
|
||||
int nfs_readpages(struct file *filp, struct address_space *mapping,
|
||||
/*
|
||||
* Read a page over NFS.
|
||||
* We read the page synchronously in the following case:
|
||||
* - The error flag is set for this page. This happens only when a
|
||||
* previous async read operation failed.
|
||||
*/
|
||||
int nfs_readpage(struct file *file, struct page *page)
|
||||
{
|
||||
struct nfs_readdesc desc;
|
||||
struct inode *inode = page_file_mapping(page)->host;
|
||||
int ret;
|
||||
|
||||
dprintk("NFS: nfs_readpage (%p %ld@%lu)\n",
|
||||
page, PAGE_SIZE, page_index(page));
|
||||
nfs_inc_stats(inode, NFSIOS_VFSREADPAGE);
|
||||
|
||||
/*
|
||||
* Try to flush any pending writes to the file..
|
||||
*
|
||||
* NOTE! Because we own the page lock, there cannot
|
||||
* be any new pending writes generated at this point
|
||||
* for this page (other pages can be written to).
|
||||
*/
|
||||
ret = nfs_wb_page(inode, page);
|
||||
if (ret)
|
||||
goto out_unlock;
|
||||
if (PageUptodate(page))
|
||||
goto out_unlock;
|
||||
|
||||
ret = -ESTALE;
|
||||
if (NFS_STALE(inode))
|
||||
goto out_unlock;
|
||||
|
||||
if (file == NULL) {
|
||||
ret = -EBADF;
|
||||
desc.ctx = nfs_find_open_context(inode, NULL, FMODE_READ);
|
||||
if (desc.ctx == NULL)
|
||||
goto out_unlock;
|
||||
} else
|
||||
desc.ctx = get_nfs_open_context(nfs_file_open_context(file));
|
||||
|
||||
if (!IS_SYNC(inode)) {
|
||||
ret = nfs_readpage_from_fscache(desc.ctx, inode, page);
|
||||
if (ret == 0)
|
||||
goto out;
|
||||
}
|
||||
|
||||
xchg(&desc.ctx->error, 0);
|
||||
nfs_pageio_init_read(&desc.pgio, inode, false,
|
||||
&nfs_async_read_completion_ops);
|
||||
|
||||
ret = readpage_async_filler(&desc, page);
|
||||
|
||||
if (!ret)
|
||||
nfs_pageio_complete_read(&desc.pgio, inode);
|
||||
|
||||
ret = desc.pgio.pg_error < 0 ? desc.pgio.pg_error : 0;
|
||||
if (!ret) {
|
||||
ret = wait_on_page_locked_killable(page);
|
||||
if (!PageUptodate(page) && !ret)
|
||||
ret = xchg(&desc.ctx->error, 0);
|
||||
}
|
||||
out:
|
||||
put_nfs_open_context(desc.ctx);
|
||||
return ret;
|
||||
out_unlock:
|
||||
unlock_page(page);
|
||||
return ret;
|
||||
}
|
||||
|
||||
int nfs_readpages(struct file *file, struct address_space *mapping,
|
||||
struct list_head *pages, unsigned nr_pages)
|
||||
{
|
||||
struct nfs_pageio_descriptor pgio;
|
||||
struct nfs_pgio_mirror *pgm;
|
||||
struct nfs_readdesc desc = {
|
||||
.pgio = &pgio,
|
||||
};
|
||||
struct nfs_readdesc desc;
|
||||
struct inode *inode = mapping->host;
|
||||
unsigned long npages;
|
||||
int ret = -ESTALE;
|
||||
int ret;
|
||||
|
||||
dprintk("NFS: nfs_readpages (%s/%Lu %d)\n",
|
||||
inode->i_sb->s_id,
|
||||
|
@ -422,15 +405,17 @@ int nfs_readpages(struct file *filp, struct address_space *mapping,
|
|||
nr_pages);
|
||||
nfs_inc_stats(inode, NFSIOS_VFSREADPAGES);
|
||||
|
||||
ret = -ESTALE;
|
||||
if (NFS_STALE(inode))
|
||||
goto out;
|
||||
|
||||
if (filp == NULL) {
|
||||
if (file == NULL) {
|
||||
ret = -EBADF;
|
||||
desc.ctx = nfs_find_open_context(inode, NULL, FMODE_READ);
|
||||
if (desc.ctx == NULL)
|
||||
return -EBADF;
|
||||
goto out;
|
||||
} else
|
||||
desc.ctx = get_nfs_open_context(nfs_file_open_context(filp));
|
||||
desc.ctx = get_nfs_open_context(nfs_file_open_context(file));
|
||||
|
||||
/* attempt to read as many of the pages as possible from the cache
|
||||
* - this returns -ENOBUFS immediately if the cookie is negative
|
||||
|
@ -440,20 +425,13 @@ int nfs_readpages(struct file *filp, struct address_space *mapping,
|
|||
if (ret == 0)
|
||||
goto read_complete; /* all pages were read */
|
||||
|
||||
nfs_pageio_init_read(&pgio, inode, false,
|
||||
nfs_pageio_init_read(&desc.pgio, inode, false,
|
||||
&nfs_async_read_completion_ops);
|
||||
|
||||
ret = read_cache_pages(mapping, pages, readpage_async_filler, &desc);
|
||||
nfs_pageio_complete(&pgio);
|
||||
|
||||
/* It doesn't make sense to do mirrored reads! */
|
||||
WARN_ON_ONCE(pgio.pg_mirror_count != 1);
|
||||
nfs_pageio_complete_read(&desc.pgio, inode);
|
||||
|
||||
pgm = &pgio.pg_mirrors[0];
|
||||
NFS_I(inode)->read_io += pgm->pg_bytes_written;
|
||||
npages = (pgm->pg_bytes_written + PAGE_SIZE - 1) >>
|
||||
PAGE_SHIFT;
|
||||
nfs_add_stats(inode, NFSIOS_READPAGES, npages);
|
||||
read_complete:
|
||||
put_nfs_open_context(desc.ctx);
|
||||
out:
|
||||
|
|
|
@ -523,6 +523,13 @@ static void nfs_show_mount_options(struct seq_file *m, struct nfs_server *nfss,
|
|||
seq_puts(m, ",local_lock=flock");
|
||||
else
|
||||
seq_puts(m, ",local_lock=posix");
|
||||
|
||||
if (nfss->flags & NFS_MOUNT_WRITE_EAGER) {
|
||||
if (nfss->flags & NFS_MOUNT_WRITE_WAIT)
|
||||
seq_puts(m, ",write=wait");
|
||||
else
|
||||
seq_puts(m, ",write=eager");
|
||||
}
|
||||
}
|
||||
|
||||
/*
|
||||
|
|
|
@ -712,16 +712,23 @@ int nfs_writepages(struct address_space *mapping, struct writeback_control *wbc)
|
|||
{
|
||||
struct inode *inode = mapping->host;
|
||||
struct nfs_pageio_descriptor pgio;
|
||||
struct nfs_io_completion *ioc;
|
||||
struct nfs_io_completion *ioc = NULL;
|
||||
unsigned int mntflags = NFS_SERVER(inode)->flags;
|
||||
int priority = 0;
|
||||
int err;
|
||||
|
||||
nfs_inc_stats(inode, NFSIOS_VFSWRITEPAGES);
|
||||
|
||||
ioc = nfs_io_completion_alloc(GFP_KERNEL);
|
||||
if (ioc)
|
||||
nfs_io_completion_init(ioc, nfs_io_completion_commit, inode);
|
||||
if (!(mntflags & NFS_MOUNT_WRITE_EAGER) || wbc->for_kupdate ||
|
||||
wbc->for_background || wbc->for_sync || wbc->for_reclaim) {
|
||||
ioc = nfs_io_completion_alloc(GFP_KERNEL);
|
||||
if (ioc)
|
||||
nfs_io_completion_init(ioc, nfs_io_completion_commit,
|
||||
inode);
|
||||
priority = wb_priority(wbc);
|
||||
}
|
||||
|
||||
nfs_pageio_init_write(&pgio, inode, wb_priority(wbc), false,
|
||||
nfs_pageio_init_write(&pgio, inode, priority, false,
|
||||
&nfs_async_write_completion_ops);
|
||||
pgio.pg_io_completion = ioc;
|
||||
err = write_cache_pages(mapping, wbc, nfs_writepages_callback, &pgio);
|
||||
|
@ -1278,19 +1285,21 @@ bool nfs_ctx_key_to_expire(struct nfs_open_context *ctx, struct inode *inode)
|
|||
* the PageUptodate() flag. In this case, we will need to turn off
|
||||
* write optimisations that depend on the page contents being correct.
|
||||
*/
|
||||
static bool nfs_write_pageuptodate(struct page *page, struct inode *inode)
|
||||
static bool nfs_write_pageuptodate(struct page *page, struct inode *inode,
|
||||
unsigned int pagelen)
|
||||
{
|
||||
struct nfs_inode *nfsi = NFS_I(inode);
|
||||
|
||||
if (nfs_have_delegated_attributes(inode))
|
||||
goto out;
|
||||
if (nfsi->cache_validity & NFS_INO_REVAL_PAGECACHE)
|
||||
if (nfsi->cache_validity &
|
||||
(NFS_INO_REVAL_PAGECACHE | NFS_INO_INVALID_SIZE))
|
||||
return false;
|
||||
smp_rmb();
|
||||
if (test_bit(NFS_INO_INVALIDATING, &nfsi->flags))
|
||||
if (test_bit(NFS_INO_INVALIDATING, &nfsi->flags) && pagelen != 0)
|
||||
return false;
|
||||
out:
|
||||
if (nfsi->cache_validity & NFS_INO_INVALID_DATA)
|
||||
if (nfsi->cache_validity & NFS_INO_INVALID_DATA && pagelen != 0)
|
||||
return false;
|
||||
return PageUptodate(page) != 0;
|
||||
}
|
||||
|
@ -1310,7 +1319,8 @@ is_whole_file_wrlock(struct file_lock *fl)
|
|||
* If the file is opened for synchronous writes then we can just skip the rest
|
||||
* of the checks.
|
||||
*/
|
||||
static int nfs_can_extend_write(struct file *file, struct page *page, struct inode *inode)
|
||||
static int nfs_can_extend_write(struct file *file, struct page *page,
|
||||
struct inode *inode, unsigned int pagelen)
|
||||
{
|
||||
int ret;
|
||||
struct file_lock_context *flctx = inode->i_flctx;
|
||||
|
@ -1318,7 +1328,7 @@ static int nfs_can_extend_write(struct file *file, struct page *page, struct ino
|
|||
|
||||
if (file->f_flags & O_DSYNC)
|
||||
return 0;
|
||||
if (!nfs_write_pageuptodate(page, inode))
|
||||
if (!nfs_write_pageuptodate(page, inode, pagelen))
|
||||
return 0;
|
||||
if (NFS_PROTO(inode)->have_delegation(inode, FMODE_WRITE))
|
||||
return 1;
|
||||
|
@ -1356,6 +1366,7 @@ int nfs_updatepage(struct file *file, struct page *page,
|
|||
struct nfs_open_context *ctx = nfs_file_open_context(file);
|
||||
struct address_space *mapping = page_file_mapping(page);
|
||||
struct inode *inode = mapping->host;
|
||||
unsigned int pagelen = nfs_page_length(page);
|
||||
int status = 0;
|
||||
|
||||
nfs_inc_stats(inode, NFSIOS_VFSUPDATEPAGE);
|
||||
|
@ -1366,8 +1377,8 @@ int nfs_updatepage(struct file *file, struct page *page,
|
|||
if (!count)
|
||||
goto out;
|
||||
|
||||
if (nfs_can_extend_write(file, page, inode)) {
|
||||
count = max(count + offset, nfs_page_length(page));
|
||||
if (nfs_can_extend_write(file, page, inode, pagelen)) {
|
||||
count = max(count + offset, pagelen);
|
||||
offset = 0;
|
||||
}
|
||||
|
||||
|
|
|
@ -388,6 +388,7 @@ extern int nfs_open(struct inode *, struct file *);
|
|||
extern int nfs_attribute_cache_expired(struct inode *inode);
|
||||
extern int nfs_revalidate_inode(struct nfs_server *server, struct inode *inode);
|
||||
extern int __nfs_revalidate_inode(struct nfs_server *, struct inode *);
|
||||
extern int nfs_clear_invalid_mapping(struct address_space *mapping);
|
||||
extern bool nfs_mapping_need_revalidate_inode(struct inode *inode);
|
||||
extern int nfs_revalidate_mapping(struct inode *inode, struct address_space *mapping);
|
||||
extern int nfs_revalidate_mapping_rcu(struct inode *inode);
|
||||
|
@ -571,8 +572,6 @@ nfs_have_writebacks(struct inode *inode)
|
|||
extern int nfs_readpage(struct file *, struct page *);
|
||||
extern int nfs_readpages(struct file *, struct address_space *,
|
||||
struct list_head *, unsigned);
|
||||
extern int nfs_readpage_async(struct nfs_open_context *, struct inode *,
|
||||
struct page *);
|
||||
|
||||
/*
|
||||
* inline functions
|
||||
|
|
|
@ -142,7 +142,7 @@ struct nfs_server {
|
|||
struct nlm_host *nlm_host; /* NLM client handle */
|
||||
struct nfs_iostats __percpu *io_stats; /* I/O statistics */
|
||||
atomic_long_t writeback; /* number of writeback pages */
|
||||
int flags; /* various flags */
|
||||
unsigned int flags; /* various flags */
|
||||
|
||||
/* The following are for internal use only. Also see uapi/linux/nfs_mount.h */
|
||||
#define NFS_MOUNT_LOOKUP_CACHE_NONEG 0x10000
|
||||
|
@ -153,6 +153,8 @@ struct nfs_server {
|
|||
#define NFS_MOUNT_LOCAL_FCNTL 0x200000
|
||||
#define NFS_MOUNT_SOFTERR 0x400000
|
||||
#define NFS_MOUNT_SOFTREVAL 0x800000
|
||||
#define NFS_MOUNT_WRITE_EAGER 0x01000000
|
||||
#define NFS_MOUNT_WRITE_WAIT 0x02000000
|
||||
|
||||
unsigned int caps; /* server capabilities */
|
||||
unsigned int rsize; /* read size */
|
||||
|
|
|
@ -60,6 +60,51 @@ DECLARE_EVENT_CLASS(rpcrdma_completion_class,
|
|||
), \
|
||||
TP_ARGS(wc, cid))
|
||||
|
||||
DECLARE_EVENT_CLASS(rpcrdma_receive_completion_class,
|
||||
TP_PROTO(
|
||||
const struct ib_wc *wc,
|
||||
const struct rpc_rdma_cid *cid
|
||||
),
|
||||
|
||||
TP_ARGS(wc, cid),
|
||||
|
||||
TP_STRUCT__entry(
|
||||
__field(u32, cq_id)
|
||||
__field(int, completion_id)
|
||||
__field(u32, received)
|
||||
__field(unsigned long, status)
|
||||
__field(unsigned int, vendor_err)
|
||||
),
|
||||
|
||||
TP_fast_assign(
|
||||
__entry->cq_id = cid->ci_queue_id;
|
||||
__entry->completion_id = cid->ci_completion_id;
|
||||
__entry->status = wc->status;
|
||||
if (wc->status) {
|
||||
__entry->received = 0;
|
||||
__entry->vendor_err = wc->vendor_err;
|
||||
} else {
|
||||
__entry->received = wc->byte_len;
|
||||
__entry->vendor_err = 0;
|
||||
}
|
||||
),
|
||||
|
||||
TP_printk("cq.id=%u cid=%d status=%s (%lu/0x%x) received=%u",
|
||||
__entry->cq_id, __entry->completion_id,
|
||||
rdma_show_wc_status(__entry->status),
|
||||
__entry->status, __entry->vendor_err,
|
||||
__entry->received
|
||||
)
|
||||
);
|
||||
|
||||
#define DEFINE_RECEIVE_COMPLETION_EVENT(name) \
|
||||
DEFINE_EVENT(rpcrdma_receive_completion_class, name, \
|
||||
TP_PROTO( \
|
||||
const struct ib_wc *wc, \
|
||||
const struct rpc_rdma_cid *cid \
|
||||
), \
|
||||
TP_ARGS(wc, cid))
|
||||
|
||||
DECLARE_EVENT_CLASS(xprtrdma_reply_class,
|
||||
TP_PROTO(
|
||||
const struct rpcrdma_rep *rep
|
||||
|
@ -838,7 +883,8 @@ TRACE_EVENT(xprtrdma_post_linv_err,
|
|||
** Completion events
|
||||
**/
|
||||
|
||||
DEFINE_COMPLETION_EVENT(xprtrdma_wc_receive);
|
||||
DEFINE_RECEIVE_COMPLETION_EVENT(xprtrdma_wc_receive);
|
||||
|
||||
DEFINE_COMPLETION_EVENT(xprtrdma_wc_send);
|
||||
DEFINE_COMPLETION_EVENT(xprtrdma_wc_fastreg);
|
||||
DEFINE_COMPLETION_EVENT(xprtrdma_wc_li);
|
||||
|
@ -1790,7 +1836,7 @@ TRACE_EVENT(svcrdma_post_recv,
|
|||
)
|
||||
);
|
||||
|
||||
DEFINE_COMPLETION_EVENT(svcrdma_wc_receive);
|
||||
DEFINE_RECEIVE_COMPLETION_EVENT(svcrdma_wc_receive);
|
||||
|
||||
TRACE_EVENT(svcrdma_rq_post_err,
|
||||
TP_PROTO(
|
||||
|
|
|
@ -478,6 +478,7 @@ rpc_get_inode(struct super_block *sb, umode_t mode)
|
|||
inode->i_fop = &simple_dir_operations;
|
||||
inode->i_op = &simple_dir_inode_operations;
|
||||
inc_nlink(inode);
|
||||
break;
|
||||
default:
|
||||
break;
|
||||
}
|
||||
|
|
|
@ -2,7 +2,7 @@
|
|||
/*
|
||||
* Copyright (c) 2015-2020, Oracle and/or its affiliates.
|
||||
*
|
||||
* Support for backward direction RPCs on RPC/RDMA.
|
||||
* Support for reverse-direction RPCs on RPC/RDMA.
|
||||
*/
|
||||
|
||||
#include <linux/sunrpc/xprt.h>
|
||||
|
@ -208,7 +208,7 @@ create_req:
|
|||
}
|
||||
|
||||
/**
|
||||
* rpcrdma_bc_receive_call - Handle a backward direction call
|
||||
* rpcrdma_bc_receive_call - Handle a reverse-direction Call
|
||||
* @r_xprt: transport receiving the call
|
||||
* @rep: receive buffer containing the call
|
||||
*
|
||||
|
|
|
@ -306,20 +306,14 @@ struct rpcrdma_mr_seg *frwr_map(struct rpcrdma_xprt *r_xprt,
|
|||
if (nsegs > ep->re_max_fr_depth)
|
||||
nsegs = ep->re_max_fr_depth;
|
||||
for (i = 0; i < nsegs;) {
|
||||
if (seg->mr_page)
|
||||
sg_set_page(&mr->mr_sg[i],
|
||||
seg->mr_page,
|
||||
seg->mr_len,
|
||||
offset_in_page(seg->mr_offset));
|
||||
else
|
||||
sg_set_buf(&mr->mr_sg[i], seg->mr_offset,
|
||||
seg->mr_len);
|
||||
sg_set_page(&mr->mr_sg[i], seg->mr_page,
|
||||
seg->mr_len, seg->mr_offset);
|
||||
|
||||
++seg;
|
||||
++i;
|
||||
if (ep->re_mrtype == IB_MR_TYPE_SG_GAPS)
|
||||
continue;
|
||||
if ((i < nsegs && offset_in_page(seg->mr_offset)) ||
|
||||
if ((i < nsegs && seg->mr_offset) ||
|
||||
offset_in_page((seg-1)->mr_offset + (seg-1)->mr_len))
|
||||
break;
|
||||
}
|
||||
|
|
|
@ -204,9 +204,7 @@ rpcrdma_alloc_sparse_pages(struct xdr_buf *buf)
|
|||
return 0;
|
||||
}
|
||||
|
||||
/* Split @vec on page boundaries into SGEs. FMR registers pages, not
|
||||
* a byte range. Other modes coalesce these SGEs into a single MR
|
||||
* when they can.
|
||||
/* Convert @vec to a single SGL element.
|
||||
*
|
||||
* Returns pointer to next available SGE, and bumps the total number
|
||||
* of SGEs consumed.
|
||||
|
@ -215,22 +213,11 @@ static struct rpcrdma_mr_seg *
|
|||
rpcrdma_convert_kvec(struct kvec *vec, struct rpcrdma_mr_seg *seg,
|
||||
unsigned int *n)
|
||||
{
|
||||
u32 remaining, page_offset;
|
||||
char *base;
|
||||
|
||||
base = vec->iov_base;
|
||||
page_offset = offset_in_page(base);
|
||||
remaining = vec->iov_len;
|
||||
while (remaining) {
|
||||
seg->mr_page = NULL;
|
||||
seg->mr_offset = base;
|
||||
seg->mr_len = min_t(u32, PAGE_SIZE - page_offset, remaining);
|
||||
remaining -= seg->mr_len;
|
||||
base += seg->mr_len;
|
||||
++seg;
|
||||
++(*n);
|
||||
page_offset = 0;
|
||||
}
|
||||
seg->mr_page = virt_to_page(vec->iov_base);
|
||||
seg->mr_offset = offset_in_page(vec->iov_base);
|
||||
seg->mr_len = vec->iov_len;
|
||||
++seg;
|
||||
++(*n);
|
||||
return seg;
|
||||
}
|
||||
|
||||
|
@ -259,7 +246,7 @@ rpcrdma_convert_iovs(struct rpcrdma_xprt *r_xprt, struct xdr_buf *xdrbuf,
|
|||
page_base = offset_in_page(xdrbuf->page_base);
|
||||
while (len) {
|
||||
seg->mr_page = *ppages;
|
||||
seg->mr_offset = (char *)page_base;
|
||||
seg->mr_offset = page_base;
|
||||
seg->mr_len = min_t(u32, PAGE_SIZE - page_base, len);
|
||||
len -= seg->mr_len;
|
||||
++ppages;
|
||||
|
@ -268,10 +255,7 @@ rpcrdma_convert_iovs(struct rpcrdma_xprt *r_xprt, struct xdr_buf *xdrbuf,
|
|||
page_base = 0;
|
||||
}
|
||||
|
||||
/* When encoding a Read chunk, the tail iovec contains an
|
||||
* XDR pad and may be omitted.
|
||||
*/
|
||||
if (type == rpcrdma_readch && r_xprt->rx_ep->re_implicit_roundup)
|
||||
if (type == rpcrdma_readch)
|
||||
goto out;
|
||||
|
||||
/* When encoding a Write chunk, some servers need to see an
|
||||
|
@ -283,7 +267,7 @@ rpcrdma_convert_iovs(struct rpcrdma_xprt *r_xprt, struct xdr_buf *xdrbuf,
|
|||
goto out;
|
||||
|
||||
if (xdrbuf->tail[0].iov_len)
|
||||
seg = rpcrdma_convert_kvec(&xdrbuf->tail[0], seg, &n);
|
||||
rpcrdma_convert_kvec(&xdrbuf->tail[0], seg, &n);
|
||||
|
||||
out:
|
||||
if (unlikely(n > RPCRDMA_MAX_SEGS))
|
||||
|
@ -644,9 +628,8 @@ out_mapping_err:
|
|||
return false;
|
||||
}
|
||||
|
||||
/* The tail iovec may include an XDR pad for the page list,
|
||||
* as well as additional content, and may not reside in the
|
||||
* same page as the head iovec.
|
||||
/* The tail iovec might not reside in the same page as the
|
||||
* head iovec.
|
||||
*/
|
||||
static bool rpcrdma_prepare_tail_iov(struct rpcrdma_req *req,
|
||||
struct xdr_buf *xdr,
|
||||
|
@ -764,27 +747,19 @@ static bool rpcrdma_prepare_readch(struct rpcrdma_xprt *r_xprt,
|
|||
struct rpcrdma_req *req,
|
||||
struct xdr_buf *xdr)
|
||||
{
|
||||
struct kvec *tail = &xdr->tail[0];
|
||||
|
||||
if (!rpcrdma_prepare_head_iov(r_xprt, req, xdr->head[0].iov_len))
|
||||
return false;
|
||||
|
||||
/* If there is a Read chunk, the page list is being handled
|
||||
/* If there is a Read chunk, the page list is handled
|
||||
* via explicit RDMA, and thus is skipped here.
|
||||
*/
|
||||
|
||||
/* Do not include the tail if it is only an XDR pad */
|
||||
if (xdr->tail[0].iov_len > 3) {
|
||||
unsigned int page_base, len;
|
||||
|
||||
/* If the content in the page list is an odd length,
|
||||
* xdr_write_pages() adds a pad at the beginning of
|
||||
* the tail iovec. Force the tail's non-pad content to
|
||||
* land at the next XDR position in the Send message.
|
||||
*/
|
||||
page_base = offset_in_page(xdr->tail[0].iov_base);
|
||||
len = xdr->tail[0].iov_len;
|
||||
page_base += len & 3;
|
||||
len -= len & 3;
|
||||
if (!rpcrdma_prepare_tail_iov(req, xdr, page_base, len))
|
||||
if (tail->iov_len) {
|
||||
if (!rpcrdma_prepare_tail_iov(req, xdr,
|
||||
offset_in_page(tail->iov_base),
|
||||
tail->iov_len))
|
||||
return false;
|
||||
kref_get(&req->rl_kref);
|
||||
}
|
||||
|
@ -1164,14 +1139,10 @@ rpcrdma_is_bcall(struct rpcrdma_xprt *r_xprt, struct rpcrdma_rep *rep)
|
|||
*/
|
||||
p = xdr_inline_decode(xdr, 3 * sizeof(*p));
|
||||
if (unlikely(!p))
|
||||
goto out_short;
|
||||
return true;
|
||||
|
||||
rpcrdma_bc_receive_call(r_xprt, rep);
|
||||
return true;
|
||||
|
||||
out_short:
|
||||
pr_warn("RPC/RDMA short backward direction call\n");
|
||||
return true;
|
||||
}
|
||||
#else /* CONFIG_SUNRPC_BACKCHANNEL */
|
||||
{
|
||||
|
|
|
@ -2,7 +2,7 @@
|
|||
/*
|
||||
* Copyright (c) 2015-2018 Oracle. All rights reserved.
|
||||
*
|
||||
* Support for backward direction RPCs on RPC/RDMA (server-side).
|
||||
* Support for reverse-direction RPCs on RPC/RDMA (server-side).
|
||||
*/
|
||||
|
||||
#include <linux/sunrpc/svc_rdma.h>
|
||||
|
@ -59,7 +59,7 @@ out_unlock:
|
|||
spin_unlock(&xprt->queue_lock);
|
||||
}
|
||||
|
||||
/* Send a backwards direction RPC call.
|
||||
/* Send a reverse-direction RPC Call.
|
||||
*
|
||||
* Caller holds the connection's mutex and has already marshaled
|
||||
* the RPC/RDMA request.
|
||||
|
|
|
@ -98,9 +98,9 @@ struct rpcrdma_ep {
|
|||
atomic_t re_completion_ids;
|
||||
};
|
||||
|
||||
/* Pre-allocate extra Work Requests for handling backward receives
|
||||
* and sends. This is a fixed value because the Work Queues are
|
||||
* allocated when the forward channel is set up, long before the
|
||||
/* Pre-allocate extra Work Requests for handling reverse-direction
|
||||
* Receives and Sends. This is a fixed value because the Work Queues
|
||||
* are allocated when the forward channel is set up, long before the
|
||||
* backchannel is provisioned. This value is two times
|
||||
* NFS4_DEF_CB_SLOT_TABLE_SIZE.
|
||||
*/
|
||||
|
@ -283,10 +283,11 @@ enum {
|
|||
RPCRDMA_MAX_IOV_SEGS,
|
||||
};
|
||||
|
||||
struct rpcrdma_mr_seg { /* chunk descriptors */
|
||||
u32 mr_len; /* length of chunk or segment */
|
||||
struct page *mr_page; /* owning page, if any */
|
||||
char *mr_offset; /* kva if no page, else offset */
|
||||
/* Arguments for DMA mapping and registration */
|
||||
struct rpcrdma_mr_seg {
|
||||
u32 mr_len; /* length of segment */
|
||||
struct page *mr_page; /* underlying struct page */
|
||||
u64 mr_offset; /* IN: page offset, OUT: iova */
|
||||
};
|
||||
|
||||
/* The Send SGE array is provisioned to send a maximum size
|
||||
|
|
|
@ -829,7 +829,7 @@ xs_stream_record_marker(struct xdr_buf *xdr)
|
|||
* EAGAIN: The socket was blocked, please call again later to
|
||||
* complete the request
|
||||
* ENOTCONN: Caller needs to invoke connect logic then call again
|
||||
* other: Some other error occured, the request was not sent
|
||||
* other: Some other error occurred, the request was not sent
|
||||
*/
|
||||
static int xs_local_send_request(struct rpc_rqst *req)
|
||||
{
|
||||
|
@ -1665,7 +1665,7 @@ static int xs_bind(struct sock_xprt *transport, struct socket *sock)
|
|||
* This ensures that we can continue to establish TCP
|
||||
* connections even when all local ephemeral ports are already
|
||||
* a part of some TCP connection. This makes no difference
|
||||
* for UDP sockets, but also doens't harm them.
|
||||
* for UDP sockets, but also doesn't harm them.
|
||||
*
|
||||
* If we're asking for any reserved port (i.e. port == 0 &&
|
||||
* transport->xprt.resvport == 1) xs_get_srcport above will
|
||||
|
@ -1875,6 +1875,7 @@ static int xs_local_setup_socket(struct sock_xprt *transport)
|
|||
xprt->stat.connect_time += (long)jiffies -
|
||||
xprt->stat.connect_start;
|
||||
xprt_set_connected(xprt);
|
||||
break;
|
||||
case -ENOBUFS:
|
||||
break;
|
||||
case -ENOENT:
|
||||
|
@ -2276,10 +2277,8 @@ static void xs_tcp_setup_socket(struct work_struct *work)
|
|||
case -EHOSTUNREACH:
|
||||
case -EADDRINUSE:
|
||||
case -ENOBUFS:
|
||||
/*
|
||||
* xs_tcp_force_close() wakes tasks with -EIO.
|
||||
* We need to wake them first to ensure the
|
||||
* correct error code.
|
||||
/* xs_tcp_force_close() wakes tasks with a fixed error code.
|
||||
* We need to wake them first to ensure the correct error code.
|
||||
*/
|
||||
xprt_wake_pending_tasks(xprt, status);
|
||||
xs_tcp_force_close(xprt);
|
||||
|
@ -2380,7 +2379,7 @@ static void xs_error_handle(struct work_struct *work)
|
|||
}
|
||||
|
||||
/**
|
||||
* xs_local_print_stats - display AF_LOCAL socket-specifc stats
|
||||
* xs_local_print_stats - display AF_LOCAL socket-specific stats
|
||||
* @xprt: rpc_xprt struct containing statistics
|
||||
* @seq: output file
|
||||
*
|
||||
|
@ -2409,7 +2408,7 @@ static void xs_local_print_stats(struct rpc_xprt *xprt, struct seq_file *seq)
|
|||
}
|
||||
|
||||
/**
|
||||
* xs_udp_print_stats - display UDP socket-specifc stats
|
||||
* xs_udp_print_stats - display UDP socket-specific stats
|
||||
* @xprt: rpc_xprt struct containing statistics
|
||||
* @seq: output file
|
||||
*
|
||||
|
@ -2433,7 +2432,7 @@ static void xs_udp_print_stats(struct rpc_xprt *xprt, struct seq_file *seq)
|
|||
}
|
||||
|
||||
/**
|
||||
* xs_tcp_print_stats - display TCP socket-specifc stats
|
||||
* xs_tcp_print_stats - display TCP socket-specific stats
|
||||
* @xprt: rpc_xprt struct containing statistics
|
||||
* @seq: output file
|
||||
*
|
||||
|
|
Loading…
Reference in New Issue