scsi: cxlflash: Handle AFU sync failures
AFU sync operations are not currently evaluated for failure. This is acceptable for paths where there is not a dependency on the AFU being consistent with the host. Examples include link reset events and LUN cleanup operations. On paths where there is a dependency, such as a LUN open, a sync failure should be acted upon. In the event of AFU sync failures, either log or cleanup as appropriate for operations that are dependent on a successful sync completion. Update documentation to reflect behavior in the event of an AFU sync failure. Signed-off-by: Uma Krishnan <ukrishn@linux.vnet.ibm.com> Acked-by: Matthew R. Ochs <mrochs@linux.vnet.ibm.com> Signed-off-by: Martin K. Petersen <martin.petersen@oracle.com>
This commit is contained in:
parent
0b09e71118
commit
c2c292f450
|
@ -257,6 +257,12 @@ DK_CXLFLASH_VLUN_RESIZE
|
||||||
operating in the virtual mode and used to program a LUN translation
|
operating in the virtual mode and used to program a LUN translation
|
||||||
table that the AFU references when provided with a resource handle.
|
table that the AFU references when provided with a resource handle.
|
||||||
|
|
||||||
|
This ioctl can return -EAGAIN if an AFU sync operation takes too long.
|
||||||
|
In addition to returning a failure to user, cxlflash will also schedule
|
||||||
|
an asynchronous AFU reset. Should the user choose to retry the operation,
|
||||||
|
it is expected to succeed. If this ioctl fails with -EAGAIN, the user
|
||||||
|
can either retry the operation or treat it as a failure.
|
||||||
|
|
||||||
DK_CXLFLASH_RELEASE
|
DK_CXLFLASH_RELEASE
|
||||||
-------------------
|
-------------------
|
||||||
This ioctl is responsible for releasing a previously obtained
|
This ioctl is responsible for releasing a previously obtained
|
||||||
|
@ -309,6 +315,12 @@ DK_CXLFLASH_VLUN_CLONE
|
||||||
clone. This is to avoid a stale entry in the file descriptor table of the
|
clone. This is to avoid a stale entry in the file descriptor table of the
|
||||||
child process.
|
child process.
|
||||||
|
|
||||||
|
This ioctl can return -EAGAIN if an AFU sync operation takes too long.
|
||||||
|
In addition to returning a failure to user, cxlflash will also schedule
|
||||||
|
an asynchronous AFU reset. Should the user choose to retry the operation,
|
||||||
|
it is expected to succeed. If this ioctl fails with -EAGAIN, the user
|
||||||
|
can either retry the operation or treat it as a failure.
|
||||||
|
|
||||||
DK_CXLFLASH_VERIFY
|
DK_CXLFLASH_VERIFY
|
||||||
------------------
|
------------------
|
||||||
This ioctl is used to detect various changes such as the capacity of
|
This ioctl is used to detect various changes such as the capacity of
|
||||||
|
|
|
@ -56,6 +56,19 @@ static void marshal_det_to_rele(struct dk_cxlflash_detach *detach,
|
||||||
release->context_id = detach->context_id;
|
release->context_id = detach->context_id;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* marshal_udir_to_rele() - translate udirect to release structure
|
||||||
|
* @udirect: Source structure from which to translate/copy.
|
||||||
|
* @release: Destination structure for the translate/copy.
|
||||||
|
*/
|
||||||
|
static void marshal_udir_to_rele(struct dk_cxlflash_udirect *udirect,
|
||||||
|
struct dk_cxlflash_release *release)
|
||||||
|
{
|
||||||
|
release->hdr = udirect->hdr;
|
||||||
|
release->context_id = udirect->context_id;
|
||||||
|
release->rsrc_handle = udirect->rsrc_handle;
|
||||||
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* cxlflash_free_errpage() - frees resources associated with global error page
|
* cxlflash_free_errpage() - frees resources associated with global error page
|
||||||
*/
|
*/
|
||||||
|
@ -622,6 +635,7 @@ int _cxlflash_disk_release(struct scsi_device *sdev,
|
||||||
res_hndl_t rhndl = release->rsrc_handle;
|
res_hndl_t rhndl = release->rsrc_handle;
|
||||||
|
|
||||||
int rc = 0;
|
int rc = 0;
|
||||||
|
int rcr = 0;
|
||||||
u64 ctxid = DECODE_CTXID(release->context_id),
|
u64 ctxid = DECODE_CTXID(release->context_id),
|
||||||
rctxid = release->context_id;
|
rctxid = release->context_id;
|
||||||
|
|
||||||
|
@ -686,8 +700,12 @@ int _cxlflash_disk_release(struct scsi_device *sdev,
|
||||||
rhte_f1->dw = 0;
|
rhte_f1->dw = 0;
|
||||||
dma_wmb(); /* Make RHT entry bottom-half clearing visible */
|
dma_wmb(); /* Make RHT entry bottom-half clearing visible */
|
||||||
|
|
||||||
if (!ctxi->err_recovery_active)
|
if (!ctxi->err_recovery_active) {
|
||||||
cxlflash_afu_sync(afu, ctxid, rhndl, AFU_HW_SYNC);
|
rcr = cxlflash_afu_sync(afu, ctxid, rhndl, AFU_HW_SYNC);
|
||||||
|
if (unlikely(rcr))
|
||||||
|
dev_dbg(dev, "%s: AFU sync failed rc=%d\n",
|
||||||
|
__func__, rcr);
|
||||||
|
}
|
||||||
break;
|
break;
|
||||||
default:
|
default:
|
||||||
WARN(1, "Unsupported LUN mode!");
|
WARN(1, "Unsupported LUN mode!");
|
||||||
|
@ -1929,6 +1947,7 @@ static int cxlflash_disk_direct_open(struct scsi_device *sdev, void *arg)
|
||||||
struct afu *afu = cfg->afu;
|
struct afu *afu = cfg->afu;
|
||||||
struct llun_info *lli = sdev->hostdata;
|
struct llun_info *lli = sdev->hostdata;
|
||||||
struct glun_info *gli = lli->parent;
|
struct glun_info *gli = lli->parent;
|
||||||
|
struct dk_cxlflash_release rel = { { 0 }, 0 };
|
||||||
|
|
||||||
struct dk_cxlflash_udirect *pphys = (struct dk_cxlflash_udirect *)arg;
|
struct dk_cxlflash_udirect *pphys = (struct dk_cxlflash_udirect *)arg;
|
||||||
|
|
||||||
|
@ -1970,13 +1989,18 @@ static int cxlflash_disk_direct_open(struct scsi_device *sdev, void *arg)
|
||||||
rsrc_handle = (rhte - ctxi->rht_start);
|
rsrc_handle = (rhte - ctxi->rht_start);
|
||||||
|
|
||||||
rht_format1(rhte, lli->lun_id[sdev->channel], ctxi->rht_perms, port);
|
rht_format1(rhte, lli->lun_id[sdev->channel], ctxi->rht_perms, port);
|
||||||
cxlflash_afu_sync(afu, ctxid, rsrc_handle, AFU_LW_SYNC);
|
|
||||||
|
|
||||||
last_lba = gli->max_lba;
|
last_lba = gli->max_lba;
|
||||||
pphys->hdr.return_flags = 0;
|
pphys->hdr.return_flags = 0;
|
||||||
pphys->last_lba = last_lba;
|
pphys->last_lba = last_lba;
|
||||||
pphys->rsrc_handle = rsrc_handle;
|
pphys->rsrc_handle = rsrc_handle;
|
||||||
|
|
||||||
|
rc = cxlflash_afu_sync(afu, ctxid, rsrc_handle, AFU_LW_SYNC);
|
||||||
|
if (unlikely(rc)) {
|
||||||
|
dev_dbg(dev, "%s: AFU sync failed rc=%d\n", __func__, rc);
|
||||||
|
goto err2;
|
||||||
|
}
|
||||||
|
|
||||||
out:
|
out:
|
||||||
if (likely(ctxi))
|
if (likely(ctxi))
|
||||||
put_context(ctxi);
|
put_context(ctxi);
|
||||||
|
@ -1984,6 +2008,10 @@ out:
|
||||||
__func__, rsrc_handle, rc, last_lba);
|
__func__, rsrc_handle, rc, last_lba);
|
||||||
return rc;
|
return rc;
|
||||||
|
|
||||||
|
err2:
|
||||||
|
marshal_udir_to_rele(pphys, &rel);
|
||||||
|
_cxlflash_disk_release(sdev, ctxi, &rel);
|
||||||
|
goto out;
|
||||||
err1:
|
err1:
|
||||||
cxlflash_lun_detach(gli);
|
cxlflash_lun_detach(gli);
|
||||||
goto out;
|
goto out;
|
||||||
|
|
|
@ -594,7 +594,9 @@ static int grow_lxt(struct afu *afu,
|
||||||
rhte->lxt_cnt = my_new_size;
|
rhte->lxt_cnt = my_new_size;
|
||||||
dma_wmb(); /* Make RHT entry's LXT table size update visible */
|
dma_wmb(); /* Make RHT entry's LXT table size update visible */
|
||||||
|
|
||||||
cxlflash_afu_sync(afu, ctxid, rhndl, AFU_LW_SYNC);
|
rc = cxlflash_afu_sync(afu, ctxid, rhndl, AFU_LW_SYNC);
|
||||||
|
if (unlikely(rc))
|
||||||
|
rc = -EAGAIN;
|
||||||
|
|
||||||
/* free old lxt if reallocated */
|
/* free old lxt if reallocated */
|
||||||
if (lxt != lxt_old)
|
if (lxt != lxt_old)
|
||||||
|
@ -673,8 +675,11 @@ static int shrink_lxt(struct afu *afu,
|
||||||
rhte->lxt_start = lxt;
|
rhte->lxt_start = lxt;
|
||||||
dma_wmb(); /* Make RHT entry's LXT table update visible */
|
dma_wmb(); /* Make RHT entry's LXT table update visible */
|
||||||
|
|
||||||
if (needs_sync)
|
if (needs_sync) {
|
||||||
cxlflash_afu_sync(afu, ctxid, rhndl, AFU_HW_SYNC);
|
rc = cxlflash_afu_sync(afu, ctxid, rhndl, AFU_HW_SYNC);
|
||||||
|
if (unlikely(rc))
|
||||||
|
rc = -EAGAIN;
|
||||||
|
}
|
||||||
|
|
||||||
if (needs_ws) {
|
if (needs_ws) {
|
||||||
/*
|
/*
|
||||||
|
@ -792,6 +797,21 @@ int _cxlflash_vlun_resize(struct scsi_device *sdev,
|
||||||
rc = grow_lxt(afu, sdev, ctxid, rhndl, rhte, &new_size);
|
rc = grow_lxt(afu, sdev, ctxid, rhndl, rhte, &new_size);
|
||||||
else if (new_size < rhte->lxt_cnt)
|
else if (new_size < rhte->lxt_cnt)
|
||||||
rc = shrink_lxt(afu, sdev, rhndl, rhte, ctxi, &new_size);
|
rc = shrink_lxt(afu, sdev, rhndl, rhte, ctxi, &new_size);
|
||||||
|
else {
|
||||||
|
/*
|
||||||
|
* Rare case where there is already sufficient space, just
|
||||||
|
* need to perform a translation sync with the AFU. This
|
||||||
|
* scenario likely follows a previous sync failure during
|
||||||
|
* a resize operation. Accordingly, perform the heavyweight
|
||||||
|
* form of translation sync as it is unknown which type of
|
||||||
|
* resize failed previously.
|
||||||
|
*/
|
||||||
|
rc = cxlflash_afu_sync(afu, ctxid, rhndl, AFU_HW_SYNC);
|
||||||
|
if (unlikely(rc)) {
|
||||||
|
rc = -EAGAIN;
|
||||||
|
goto out;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
resize->hdr.return_flags = 0;
|
resize->hdr.return_flags = 0;
|
||||||
resize->last_lba = (new_size * MC_CHUNK_SIZE * gli->blk_len);
|
resize->last_lba = (new_size * MC_CHUNK_SIZE * gli->blk_len);
|
||||||
|
@ -1084,10 +1104,13 @@ static int clone_lxt(struct afu *afu,
|
||||||
{
|
{
|
||||||
struct cxlflash_cfg *cfg = afu->parent;
|
struct cxlflash_cfg *cfg = afu->parent;
|
||||||
struct device *dev = &cfg->dev->dev;
|
struct device *dev = &cfg->dev->dev;
|
||||||
struct sisl_lxt_entry *lxt;
|
struct sisl_lxt_entry *lxt = NULL;
|
||||||
|
bool locked = false;
|
||||||
u32 ngrps;
|
u32 ngrps;
|
||||||
u64 aun; /* chunk# allocated by block allocator */
|
u64 aun; /* chunk# allocated by block allocator */
|
||||||
int i, j;
|
int j;
|
||||||
|
int i = 0;
|
||||||
|
int rc = 0;
|
||||||
|
|
||||||
ngrps = LXT_NUM_GROUPS(rhte_src->lxt_cnt);
|
ngrps = LXT_NUM_GROUPS(rhte_src->lxt_cnt);
|
||||||
|
|
||||||
|
@ -1095,33 +1118,29 @@ static int clone_lxt(struct afu *afu,
|
||||||
/* allocate new LXTs for clone */
|
/* allocate new LXTs for clone */
|
||||||
lxt = kzalloc((sizeof(*lxt) * LXT_GROUP_SIZE * ngrps),
|
lxt = kzalloc((sizeof(*lxt) * LXT_GROUP_SIZE * ngrps),
|
||||||
GFP_KERNEL);
|
GFP_KERNEL);
|
||||||
if (unlikely(!lxt))
|
if (unlikely(!lxt)) {
|
||||||
return -ENOMEM;
|
rc = -ENOMEM;
|
||||||
|
goto out;
|
||||||
|
}
|
||||||
|
|
||||||
/* copy over */
|
/* copy over */
|
||||||
memcpy(lxt, rhte_src->lxt_start,
|
memcpy(lxt, rhte_src->lxt_start,
|
||||||
(sizeof(*lxt) * rhte_src->lxt_cnt));
|
(sizeof(*lxt) * rhte_src->lxt_cnt));
|
||||||
|
|
||||||
/* clone the LBAs in block allocator via ref_cnt */
|
/* clone the LBAs in block allocator via ref_cnt, note that the
|
||||||
|
* block allocator mutex must be held until it is established
|
||||||
|
* that this routine will complete without the need for a
|
||||||
|
* cleanup.
|
||||||
|
*/
|
||||||
mutex_lock(&blka->mutex);
|
mutex_lock(&blka->mutex);
|
||||||
|
locked = true;
|
||||||
for (i = 0; i < rhte_src->lxt_cnt; i++) {
|
for (i = 0; i < rhte_src->lxt_cnt; i++) {
|
||||||
aun = (lxt[i].rlba_base >> MC_CHUNK_SHIFT);
|
aun = (lxt[i].rlba_base >> MC_CHUNK_SHIFT);
|
||||||
if (ba_clone(&blka->ba_lun, aun) == -1ULL) {
|
if (ba_clone(&blka->ba_lun, aun) == -1ULL) {
|
||||||
/* free the clones already made */
|
rc = -EIO;
|
||||||
for (j = 0; j < i; j++) {
|
goto err;
|
||||||
aun = (lxt[j].rlba_base >>
|
|
||||||
MC_CHUNK_SHIFT);
|
|
||||||
ba_free(&blka->ba_lun, aun);
|
|
||||||
}
|
|
||||||
|
|
||||||
mutex_unlock(&blka->mutex);
|
|
||||||
kfree(lxt);
|
|
||||||
return -EIO;
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
mutex_unlock(&blka->mutex);
|
|
||||||
} else {
|
|
||||||
lxt = NULL;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
/*
|
/*
|
||||||
|
@ -1136,10 +1155,31 @@ static int clone_lxt(struct afu *afu,
|
||||||
rhte->lxt_cnt = rhte_src->lxt_cnt;
|
rhte->lxt_cnt = rhte_src->lxt_cnt;
|
||||||
dma_wmb(); /* Make RHT entry's LXT table size update visible */
|
dma_wmb(); /* Make RHT entry's LXT table size update visible */
|
||||||
|
|
||||||
cxlflash_afu_sync(afu, ctxid, rhndl, AFU_LW_SYNC);
|
rc = cxlflash_afu_sync(afu, ctxid, rhndl, AFU_LW_SYNC);
|
||||||
|
if (unlikely(rc)) {
|
||||||
|
rc = -EAGAIN;
|
||||||
|
goto err2;
|
||||||
|
}
|
||||||
|
|
||||||
dev_dbg(dev, "%s: returning\n", __func__);
|
out:
|
||||||
return 0;
|
if (locked)
|
||||||
|
mutex_unlock(&blka->mutex);
|
||||||
|
dev_dbg(dev, "%s: returning rc=%d\n", __func__, rc);
|
||||||
|
return rc;
|
||||||
|
err2:
|
||||||
|
/* Reset the RHTE */
|
||||||
|
rhte->lxt_cnt = 0;
|
||||||
|
dma_wmb();
|
||||||
|
rhte->lxt_start = NULL;
|
||||||
|
dma_wmb();
|
||||||
|
err:
|
||||||
|
/* free the clones already made */
|
||||||
|
for (j = 0; j < i; j++) {
|
||||||
|
aun = (lxt[j].rlba_base >> MC_CHUNK_SHIFT);
|
||||||
|
ba_free(&blka->ba_lun, aun);
|
||||||
|
}
|
||||||
|
kfree(lxt);
|
||||||
|
goto out;
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
|
|
Loading…
Reference in New Issue