nfs: handle multiple reqs in nfs_page_async_flush
Change nfs_find_and_lock_request so nfs_page_async_flush can handle multiple requests in a page. There is only one request for a page the first time nfs_page_async_flush is called, but if a write or commit fails, async_flush is called again and there may be multiple requests associated with the page. The solution is to merge all the requests in a page group into a single request before calling nfs_pageio_add_request. Rename nfs_find_and_lock_request to nfs_lock_and_join_requests and change it to first lock all requests for the page, then cancel and merge all subrequests into the head request. Signed-off-by: Weston Andros Adamson <dros@primarydata.com> Signed-off-by: Trond Myklebust <trond.myklebust@primarydata.com>
This commit is contained in:
parent
84d3a9a913
commit
d458138353
|
@ -244,6 +244,7 @@ void nfs_pgio_data_release(struct nfs_pgio_data *);
|
|||
int nfs_generic_pgio(struct nfs_pageio_descriptor *, struct nfs_pgio_header *);
|
||||
int nfs_initiate_pgio(struct rpc_clnt *, struct nfs_pgio_data *,
|
||||
const struct rpc_call_ops *, int, int);
|
||||
void nfs_free_request(struct nfs_page *req);
|
||||
|
||||
static inline void nfs_iocounter_init(struct nfs_io_counter *c)
|
||||
{
|
||||
|
|
|
@ -29,8 +29,6 @@
|
|||
static struct kmem_cache *nfs_page_cachep;
|
||||
static const struct rpc_call_ops nfs_pgio_common_ops;
|
||||
|
||||
static void nfs_free_request(struct nfs_page *);
|
||||
|
||||
static bool nfs_pgarray_set(struct nfs_page_array *p, unsigned int pagecount)
|
||||
{
|
||||
p->npages = pagecount;
|
||||
|
@ -406,7 +404,7 @@ static void nfs_clear_request(struct nfs_page *req)
|
|||
*
|
||||
* Note: Should never be called with the spinlock held!
|
||||
*/
|
||||
static void nfs_free_request(struct nfs_page *req)
|
||||
void nfs_free_request(struct nfs_page *req)
|
||||
{
|
||||
WARN_ON_ONCE(req->wb_this_page != req);
|
||||
|
||||
|
|
265
fs/nfs/write.c
265
fs/nfs/write.c
|
@ -46,6 +46,7 @@ static const struct rpc_call_ops nfs_commit_ops;
|
|||
static const struct nfs_pgio_completion_ops nfs_async_write_completion_ops;
|
||||
static const struct nfs_commit_completion_ops nfs_commit_completion_ops;
|
||||
static const struct nfs_rw_ops nfs_rw_write_ops;
|
||||
static void nfs_clear_request_commit(struct nfs_page *req);
|
||||
|
||||
static struct kmem_cache *nfs_wdata_cachep;
|
||||
static mempool_t *nfs_wdata_mempool;
|
||||
|
@ -289,36 +290,246 @@ static void nfs_end_page_writeback(struct nfs_page *req)
|
|||
clear_bdi_congested(&nfss->backing_dev_info, BLK_RW_ASYNC);
|
||||
}
|
||||
|
||||
static struct nfs_page *nfs_find_and_lock_request(struct page *page, bool nonblock)
|
||||
|
||||
/* nfs_page_group_clear_bits
|
||||
* @req - an nfs request
|
||||
* clears all page group related bits from @req
|
||||
*/
|
||||
static void
|
||||
nfs_page_group_clear_bits(struct nfs_page *req)
|
||||
{
|
||||
struct inode *inode = page_file_mapping(page)->host;
|
||||
struct nfs_page *req;
|
||||
clear_bit(PG_TEARDOWN, &req->wb_flags);
|
||||
clear_bit(PG_UNLOCKPAGE, &req->wb_flags);
|
||||
clear_bit(PG_UPTODATE, &req->wb_flags);
|
||||
clear_bit(PG_WB_END, &req->wb_flags);
|
||||
clear_bit(PG_REMOVE, &req->wb_flags);
|
||||
}
|
||||
|
||||
|
||||
/*
|
||||
* nfs_unroll_locks_and_wait - unlock all newly locked reqs and wait on @req
|
||||
*
|
||||
* this is a helper function for nfs_lock_and_join_requests
|
||||
*
|
||||
* @inode - inode associated with request page group, must be holding inode lock
|
||||
* @head - head request of page group, must be holding head lock
|
||||
* @req - request that couldn't lock and needs to wait on the req bit lock
|
||||
* @nonblock - if true, don't actually wait
|
||||
*
|
||||
* NOTE: this must be called holding page_group bit lock and inode spin lock
|
||||
* and BOTH will be released before returning.
|
||||
*
|
||||
* returns 0 on success, < 0 on error.
|
||||
*/
|
||||
static int
|
||||
nfs_unroll_locks_and_wait(struct inode *inode, struct nfs_page *head,
|
||||
struct nfs_page *req, bool nonblock)
|
||||
__releases(&inode->i_lock)
|
||||
{
|
||||
struct nfs_page *tmp;
|
||||
int ret;
|
||||
|
||||
spin_lock(&inode->i_lock);
|
||||
for (;;) {
|
||||
req = nfs_page_find_head_request_locked(NFS_I(inode), page);
|
||||
if (req == NULL)
|
||||
break;
|
||||
if (nfs_lock_request(req))
|
||||
break;
|
||||
/* Note: If we hold the page lock, as is the case in nfs_writepage,
|
||||
* then the call to nfs_lock_request() will always
|
||||
* succeed provided that someone hasn't already marked the
|
||||
* request as dirty (in which case we don't care).
|
||||
*/
|
||||
spin_unlock(&inode->i_lock);
|
||||
if (!nonblock)
|
||||
ret = nfs_wait_on_request(req);
|
||||
else
|
||||
ret = -EAGAIN;
|
||||
nfs_release_request(req);
|
||||
if (ret != 0)
|
||||
return ERR_PTR(ret);
|
||||
spin_lock(&inode->i_lock);
|
||||
}
|
||||
/* relinquish all the locks successfully grabbed this run */
|
||||
for (tmp = head ; tmp != req; tmp = tmp->wb_this_page)
|
||||
nfs_unlock_request(tmp);
|
||||
|
||||
WARN_ON_ONCE(test_bit(PG_TEARDOWN, &req->wb_flags));
|
||||
|
||||
/* grab a ref on the request that will be waited on */
|
||||
kref_get(&req->wb_kref);
|
||||
|
||||
nfs_page_group_unlock(head);
|
||||
spin_unlock(&inode->i_lock);
|
||||
return req;
|
||||
|
||||
/* release ref from nfs_page_find_head_request_locked */
|
||||
nfs_release_request(head);
|
||||
|
||||
if (!nonblock)
|
||||
ret = nfs_wait_on_request(req);
|
||||
else
|
||||
ret = -EAGAIN;
|
||||
nfs_release_request(req);
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
/*
|
||||
* nfs_destroy_unlinked_subrequests - destroy recently unlinked subrequests
|
||||
*
|
||||
* @destroy_list - request list (using wb_this_page) terminated by @old_head
|
||||
* @old_head - the old head of the list
|
||||
*
|
||||
* All subrequests must be locked and removed from all lists, so at this point
|
||||
* they are only "active" in this function, and possibly in nfs_wait_on_request
|
||||
* with a reference held by some other context.
|
||||
*/
|
||||
static void
|
||||
nfs_destroy_unlinked_subrequests(struct nfs_page *destroy_list,
|
||||
struct nfs_page *old_head)
|
||||
{
|
||||
while (destroy_list) {
|
||||
struct nfs_page *subreq = destroy_list;
|
||||
|
||||
destroy_list = (subreq->wb_this_page == old_head) ?
|
||||
NULL : subreq->wb_this_page;
|
||||
|
||||
WARN_ON_ONCE(old_head != subreq->wb_head);
|
||||
|
||||
/* make sure old group is not used */
|
||||
subreq->wb_head = subreq;
|
||||
subreq->wb_this_page = subreq;
|
||||
|
||||
nfs_clear_request_commit(subreq);
|
||||
|
||||
/* subreq is now totally disconnected from page group or any
|
||||
* write / commit lists. last chance to wake any waiters */
|
||||
nfs_unlock_request(subreq);
|
||||
|
||||
if (!test_bit(PG_TEARDOWN, &subreq->wb_flags)) {
|
||||
/* release ref on old head request */
|
||||
nfs_release_request(old_head);
|
||||
|
||||
nfs_page_group_clear_bits(subreq);
|
||||
|
||||
/* release the PG_INODE_REF reference */
|
||||
if (test_and_clear_bit(PG_INODE_REF, &subreq->wb_flags))
|
||||
nfs_release_request(subreq);
|
||||
else
|
||||
WARN_ON_ONCE(1);
|
||||
} else {
|
||||
WARN_ON_ONCE(test_bit(PG_CLEAN, &subreq->wb_flags));
|
||||
/* zombie requests have already released the last
|
||||
* reference and were waiting on the rest of the
|
||||
* group to complete. Since it's no longer part of a
|
||||
* group, simply free the request */
|
||||
nfs_page_group_clear_bits(subreq);
|
||||
nfs_free_request(subreq);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/*
|
||||
* nfs_lock_and_join_requests - join all subreqs to the head req and return
|
||||
* a locked reference, cancelling any pending
|
||||
* operations for this page.
|
||||
*
|
||||
* @page - the page used to lookup the "page group" of nfs_page structures
|
||||
* @nonblock - if true, don't block waiting for request locks
|
||||
*
|
||||
* This function joins all sub requests to the head request by first
|
||||
* locking all requests in the group, cancelling any pending operations
|
||||
* and finally updating the head request to cover the whole range covered by
|
||||
* the (former) group. All subrequests are removed from any write or commit
|
||||
* lists, unlinked from the group and destroyed.
|
||||
*
|
||||
* Returns a locked, referenced pointer to the head request - which after
|
||||
* this call is guaranteed to be the only request associated with the page.
|
||||
* Returns NULL if no requests are found for @page, or a ERR_PTR if an
|
||||
* error was encountered.
|
||||
*/
|
||||
static struct nfs_page *
|
||||
nfs_lock_and_join_requests(struct page *page, bool nonblock)
|
||||
{
|
||||
struct inode *inode = page_file_mapping(page)->host;
|
||||
struct nfs_page *head, *subreq;
|
||||
struct nfs_page *destroy_list = NULL;
|
||||
unsigned int total_bytes;
|
||||
int ret;
|
||||
|
||||
try_again:
|
||||
total_bytes = 0;
|
||||
|
||||
WARN_ON_ONCE(destroy_list);
|
||||
|
||||
spin_lock(&inode->i_lock);
|
||||
|
||||
/*
|
||||
* A reference is taken only on the head request which acts as a
|
||||
* reference to the whole page group - the group will not be destroyed
|
||||
* until the head reference is released.
|
||||
*/
|
||||
head = nfs_page_find_head_request_locked(NFS_I(inode), page);
|
||||
|
||||
if (!head) {
|
||||
spin_unlock(&inode->i_lock);
|
||||
return NULL;
|
||||
}
|
||||
|
||||
/* lock each request in the page group */
|
||||
nfs_page_group_lock(head);
|
||||
subreq = head;
|
||||
do {
|
||||
/*
|
||||
* Subrequests are always contiguous, non overlapping
|
||||
* and in order. If not, it's a programming error.
|
||||
*/
|
||||
WARN_ON_ONCE(subreq->wb_offset !=
|
||||
(head->wb_offset + total_bytes));
|
||||
|
||||
/* keep track of how many bytes this group covers */
|
||||
total_bytes += subreq->wb_bytes;
|
||||
|
||||
if (!nfs_lock_request(subreq)) {
|
||||
/* releases page group bit lock and
|
||||
* inode spin lock and all references */
|
||||
ret = nfs_unroll_locks_and_wait(inode, head,
|
||||
subreq, nonblock);
|
||||
|
||||
if (ret == 0)
|
||||
goto try_again;
|
||||
|
||||
return ERR_PTR(ret);
|
||||
}
|
||||
|
||||
subreq = subreq->wb_this_page;
|
||||
} while (subreq != head);
|
||||
|
||||
/* Now that all requests are locked, make sure they aren't on any list.
|
||||
* Commit list removal accounting is done after locks are dropped */
|
||||
subreq = head;
|
||||
do {
|
||||
nfs_list_remove_request(subreq);
|
||||
subreq = subreq->wb_this_page;
|
||||
} while (subreq != head);
|
||||
|
||||
/* unlink subrequests from head, destroy them later */
|
||||
if (head->wb_this_page != head) {
|
||||
/* destroy list will be terminated by head */
|
||||
destroy_list = head->wb_this_page;
|
||||
head->wb_this_page = head;
|
||||
|
||||
/* change head request to cover whole range that
|
||||
* the former page group covered */
|
||||
head->wb_bytes = total_bytes;
|
||||
}
|
||||
|
||||
/*
|
||||
* prepare head request to be added to new pgio descriptor
|
||||
*/
|
||||
nfs_page_group_clear_bits(head);
|
||||
|
||||
/*
|
||||
* some part of the group was still on the inode list - otherwise
|
||||
* the group wouldn't be involved in async write.
|
||||
* grab a reference for the head request, iff it needs one.
|
||||
*/
|
||||
if (!test_and_set_bit(PG_INODE_REF, &head->wb_flags))
|
||||
kref_get(&head->wb_kref);
|
||||
|
||||
nfs_page_group_unlock(head);
|
||||
|
||||
/* drop lock to clear_request_commit the head req and clean up
|
||||
* requests on destroy list */
|
||||
spin_unlock(&inode->i_lock);
|
||||
|
||||
nfs_destroy_unlinked_subrequests(destroy_list, head);
|
||||
|
||||
/* clean up commit list state */
|
||||
nfs_clear_request_commit(head);
|
||||
|
||||
/* still holds ref on head from nfs_page_find_head_request_locked
|
||||
* and still has lock on head from lock loop */
|
||||
return head;
|
||||
}
|
||||
|
||||
/*
|
||||
|
@ -331,7 +542,7 @@ static int nfs_page_async_flush(struct nfs_pageio_descriptor *pgio,
|
|||
struct nfs_page *req;
|
||||
int ret = 0;
|
||||
|
||||
req = nfs_find_and_lock_request(page, nonblock);
|
||||
req = nfs_lock_and_join_requests(page, nonblock);
|
||||
if (!req)
|
||||
goto out;
|
||||
ret = PTR_ERR(req);
|
||||
|
|
Loading…
Reference in New Issue