pnfs/blocklayout: handle transient devices
PNFS block/SCSI layouts should gracefully handle cases where block devices are not available when a layout is retrieved, or the block devices are removed while the client holds a layout. While setting up a layout segment, keep a record of an unavailable or un-parsable block device in cache with a flag so that subsequent layouts do not spam the server with GETDEVINFO. We can reuse the current NFS_DEVICEID_UNAVAILABLE handling with one variation: instead of reusing the device, we will discard it and send a fresh GETDEVINFO after the timeout, since the lookup and validation of the device occurs within the GETDEVINFO response handling. A lookup of a layout segment that references an unavailable device will return a segment with the NFS_LSEG_UNAVAILABLE flag set. This will allow the pgio layer to mark the layout with the appropriate fail bit, which forces subsequent IO to the MDS, and prevents spamming the server with LAYOUTGET, LAYOUTRETURN. Finally, when IO to a block device fails, look up the block device(s) referenced by the pgio header, and mark them as unavailable. Signed-off-by: Benjamin Coddington <bcodding@redhat.com> Signed-off-by: Trond Myklebust <trond.myklebust@primarydata.com>
This commit is contained in:
parent
d78471d32b
commit
b3dce6a2f0
|
@ -184,6 +184,29 @@ retry:
|
|||
return bio;
|
||||
}
|
||||
|
||||
static void bl_mark_devices_unavailable(struct nfs_pgio_header *header, bool rw)
|
||||
{
|
||||
struct pnfs_block_layout *bl = BLK_LSEG2EXT(header->lseg);
|
||||
size_t bytes_left = header->args.count;
|
||||
sector_t isect, extent_length = 0;
|
||||
struct pnfs_block_extent be;
|
||||
|
||||
isect = header->args.offset >> SECTOR_SHIFT;
|
||||
bytes_left += header->args.offset - (isect << SECTOR_SHIFT);
|
||||
|
||||
while (bytes_left > 0) {
|
||||
if (!ext_tree_lookup(bl, isect, &be, rw))
|
||||
return;
|
||||
extent_length = be.be_length - (isect - be.be_f_offset);
|
||||
nfs4_mark_deviceid_unavailable(be.be_device);
|
||||
isect += extent_length;
|
||||
if (bytes_left > extent_length << SECTOR_SHIFT)
|
||||
bytes_left -= extent_length << SECTOR_SHIFT;
|
||||
else
|
||||
bytes_left = 0;
|
||||
}
|
||||
}
|
||||
|
||||
static void bl_end_io_read(struct bio *bio)
|
||||
{
|
||||
struct parallel_io *par = bio->bi_private;
|
||||
|
@ -194,6 +217,7 @@ static void bl_end_io_read(struct bio *bio)
|
|||
if (!header->pnfs_error)
|
||||
header->pnfs_error = -EIO;
|
||||
pnfs_set_lo_fail(header->lseg);
|
||||
bl_mark_devices_unavailable(header, false);
|
||||
}
|
||||
|
||||
bio_put(bio);
|
||||
|
@ -323,6 +347,7 @@ static void bl_end_io_write(struct bio *bio)
|
|||
if (!header->pnfs_error)
|
||||
header->pnfs_error = -EIO;
|
||||
pnfs_set_lo_fail(header->lseg);
|
||||
bl_mark_devices_unavailable(header, true);
|
||||
}
|
||||
bio_put(bio);
|
||||
put_parallel(par);
|
||||
|
@ -552,6 +577,31 @@ static int decode_sector_number(__be32 **rp, sector_t *sp)
|
|||
return 0;
|
||||
}
|
||||
|
||||
static struct nfs4_deviceid_node *
|
||||
bl_find_get_deviceid(struct nfs_server *server,
|
||||
const struct nfs4_deviceid *id, struct rpc_cred *cred,
|
||||
gfp_t gfp_mask)
|
||||
{
|
||||
struct nfs4_deviceid_node *node;
|
||||
unsigned long start, end;
|
||||
|
||||
retry:
|
||||
node = nfs4_find_get_deviceid(server, id, cred, gfp_mask);
|
||||
if (!node)
|
||||
return ERR_PTR(-ENODEV);
|
||||
|
||||
if (test_bit(NFS_DEVICEID_UNAVAILABLE, &node->flags) == 0)
|
||||
return node;
|
||||
|
||||
end = jiffies;
|
||||
start = end - PNFS_DEVICE_RETRY_TIMEOUT;
|
||||
if (!time_in_range(node->timestamp_unavailable, start, end)) {
|
||||
nfs4_delete_deviceid(node->ld, node->nfs_client, id);
|
||||
goto retry;
|
||||
}
|
||||
return ERR_PTR(-ENODEV);
|
||||
}
|
||||
|
||||
static int
|
||||
bl_alloc_extent(struct xdr_stream *xdr, struct pnfs_layout_hdr *lo,
|
||||
struct layout_verification *lv, struct list_head *extents,
|
||||
|
@ -573,16 +623,18 @@ bl_alloc_extent(struct xdr_stream *xdr, struct pnfs_layout_hdr *lo,
|
|||
memcpy(&id, p, NFS4_DEVICEID4_SIZE);
|
||||
p += XDR_QUADLEN(NFS4_DEVICEID4_SIZE);
|
||||
|
||||
error = -EIO;
|
||||
be->be_device = nfs4_find_get_deviceid(NFS_SERVER(lo->plh_inode), &id,
|
||||
be->be_device = bl_find_get_deviceid(NFS_SERVER(lo->plh_inode), &id,
|
||||
lo->plh_lc_cred, gfp_mask);
|
||||
if (!be->be_device)
|
||||
if (IS_ERR(be->be_device)) {
|
||||
error = PTR_ERR(be->be_device);
|
||||
goto out_free_be;
|
||||
}
|
||||
|
||||
/*
|
||||
* The next three values are read in as bytes, but stored in the
|
||||
* extent structure in 512-byte granularity.
|
||||
*/
|
||||
error = -EIO;
|
||||
if (decode_sector_number(&p, &be->be_f_offset) < 0)
|
||||
goto out_put_deviceid;
|
||||
if (decode_sector_number(&p, &be->be_length) < 0)
|
||||
|
@ -692,11 +744,16 @@ out_free_scratch:
|
|||
__free_page(scratch);
|
||||
out:
|
||||
dprintk("%s returns %d\n", __func__, status);
|
||||
if (status) {
|
||||
switch (status) {
|
||||
case -ENODEV:
|
||||
/* Our extent block devices are unavailable */
|
||||
set_bit(NFS_LSEG_UNAVAILABLE, &lseg->pls_flags);
|
||||
case 0:
|
||||
return lseg;
|
||||
default:
|
||||
kfree(lseg);
|
||||
return ERR_PTR(status);
|
||||
}
|
||||
return lseg;
|
||||
}
|
||||
|
||||
static void
|
||||
|
@ -798,6 +855,13 @@ bl_pg_init_read(struct nfs_pageio_descriptor *pgio, struct nfs_page *req)
|
|||
}
|
||||
|
||||
pnfs_generic_pg_init_read(pgio, req);
|
||||
|
||||
if (pgio->pg_lseg &&
|
||||
test_bit(NFS_LSEG_UNAVAILABLE, &pgio->pg_lseg->pls_flags)) {
|
||||
pnfs_error_mark_layout_for_return(pgio->pg_inode, pgio->pg_lseg);
|
||||
pnfs_set_lo_fail(pgio->pg_lseg);
|
||||
nfs_pageio_reset_read_mds(pgio);
|
||||
}
|
||||
}
|
||||
|
||||
/*
|
||||
|
@ -853,6 +917,14 @@ bl_pg_init_write(struct nfs_pageio_descriptor *pgio, struct nfs_page *req)
|
|||
wb_size = nfs_dreq_bytes_left(pgio->pg_dreq);
|
||||
|
||||
pnfs_generic_pg_init_write(pgio, req, wb_size);
|
||||
|
||||
if (pgio->pg_lseg &&
|
||||
test_bit(NFS_LSEG_UNAVAILABLE, &pgio->pg_lseg->pls_flags)) {
|
||||
|
||||
pnfs_error_mark_layout_for_return(pgio->pg_inode, pgio->pg_lseg);
|
||||
pnfs_set_lo_fail(pgio->pg_lseg);
|
||||
nfs_pageio_reset_write_mds(pgio);
|
||||
}
|
||||
}
|
||||
|
||||
/*
|
||||
|
|
|
@ -533,14 +533,11 @@ bl_alloc_deviceid_node(struct nfs_server *server, struct pnfs_device *pdev,
|
|||
goto out_free_volumes;
|
||||
|
||||
ret = bl_parse_deviceid(server, top, volumes, nr_volumes - 1, gfp_mask);
|
||||
if (ret) {
|
||||
bl_free_device(top);
|
||||
kfree(top);
|
||||
goto out_free_volumes;
|
||||
}
|
||||
|
||||
node = &top->node;
|
||||
nfs4_init_deviceid_node(node, server, &pdev->dev_id);
|
||||
if (ret)
|
||||
nfs4_mark_deviceid_unavailable(node);
|
||||
|
||||
out_free_volumes:
|
||||
kfree(volumes);
|
||||
|
|
|
@ -655,7 +655,7 @@ pnfs_mark_matching_lsegs_invalid(struct pnfs_layout_hdr *lo,
|
|||
return 0;
|
||||
list_for_each_entry_safe(lseg, next, &lo->plh_segs, pls_list)
|
||||
if (pnfs_match_lseg_recall(lseg, recall_range, seq)) {
|
||||
dprintk("%s: freeing lseg %p iomode %d seq %u"
|
||||
dprintk("%s: freeing lseg %p iomode %d seq %u "
|
||||
"offset %llu length %llu\n", __func__,
|
||||
lseg, lseg->pls_range.iomode, lseg->pls_seq,
|
||||
lseg->pls_range.offset, lseg->pls_range.length);
|
||||
|
|
|
@ -40,6 +40,7 @@ enum {
|
|||
NFS_LSEG_ROC, /* roc bit received from server */
|
||||
NFS_LSEG_LAYOUTCOMMIT, /* layoutcommit bit set for layoutcommit */
|
||||
NFS_LSEG_LAYOUTRETURN, /* layoutreturn bit set for layoutreturn */
|
||||
NFS_LSEG_UNAVAILABLE, /* unavailable bit set for temporary problem */
|
||||
};
|
||||
|
||||
/* Individual ip address */
|
||||
|
@ -86,6 +87,7 @@ enum pnfs_try_status {
|
|||
*/
|
||||
#define NFS4_DEF_DS_TIMEO 600 /* in tenths of a second */
|
||||
#define NFS4_DEF_DS_RETRANS 5
|
||||
#define PNFS_DEVICE_RETRY_TIMEOUT (120*HZ)
|
||||
|
||||
/* error codes for internal use */
|
||||
#define NFS4ERR_RESET_TO_MDS 12001
|
||||
|
|
|
@ -43,7 +43,6 @@
|
|||
#define NFS4_DEVICE_ID_HASH_SIZE (1 << NFS4_DEVICE_ID_HASH_BITS)
|
||||
#define NFS4_DEVICE_ID_HASH_MASK (NFS4_DEVICE_ID_HASH_SIZE - 1)
|
||||
|
||||
#define PNFS_DEVICE_RETRY_TIMEOUT (120*HZ)
|
||||
|
||||
static struct hlist_head nfs4_deviceid_cache[NFS4_DEVICE_ID_HASH_SIZE];
|
||||
static DEFINE_SPINLOCK(nfs4_deviceid_lock);
|
||||
|
|
Loading…
Reference in New Issue