s390/dasd: fix unusable device after safe offline processing
The safe offline processing needs, as well as the normal offline processing, to be locked against multiple parallel executions. But it should be able to be overtaken by a normal offline processing to make sure that the device does not wait forever for outstanding I/O if the user wants to. Unfortunately the parallel processing of safe offline and normal offline might lead to a race situation where both threads report successful execution to the CIO layer which in turn tries to deregister the kobject of the device twice. This leads to a refcount_t: underflow; use-after-free. error and the device is not able to be set online again afterwards without a reboot. Correct the locking of the safe offline processing by doing the following: - Use the cdev lock to secure all set and test operations to the device flags. - Two safe offline processes are locked against each other using the DASD_FLAG_SAFE_OFFLINE and DASD_FLAG_SAFE_OFFLINE_RUNNING device flags. The differentiation between offline triggered and offline running is needed since the normal offline attribute is owned by CIO and we have to pass over control in between. - The dasd_generic_set_offline process handles the offline processing. It is locked against parallel execution using the DASD_FLAG_OFFLINE. - Only a running safe offline should be able to be overtaken by a single normal offline. This is ensured by clearing the DASD_FLAG_SAFE_OFFLINE_RUNNING flag when a normal offline overtakes. So this can only happen ones. - The safe offline just aborts in this case doing nothing and the normal offline processing finishes as usual. Signed-off-by: Stefan Haberland <sth@linux.vnet.ibm.com> Signed-off-by: Martin Schwidefsky <schwidefsky@de.ibm.com>
This commit is contained in:
parent
b487a914f8
commit
2757fe1d8e
|
@ -3562,57 +3562,69 @@ int dasd_generic_set_offline(struct ccw_device *cdev)
|
|||
else
|
||||
pr_warn("%s: The DASD cannot be set offline while it is in use\n",
|
||||
dev_name(&cdev->dev));
|
||||
clear_bit(DASD_FLAG_OFFLINE, &device->flags);
|
||||
goto out_busy;
|
||||
rc = -EBUSY;
|
||||
goto out_err;
|
||||
}
|
||||
}
|
||||
|
||||
if (test_bit(DASD_FLAG_SAFE_OFFLINE_RUNNING, &device->flags)) {
|
||||
/*
|
||||
* safe offline already running
|
||||
* could only be called by normal offline so safe_offline flag
|
||||
* needs to be removed to run normal offline and kill all I/O
|
||||
*/
|
||||
if (test_and_set_bit(DASD_FLAG_OFFLINE, &device->flags))
|
||||
/* Already doing normal offline processing */
|
||||
goto out_busy;
|
||||
else
|
||||
clear_bit(DASD_FLAG_SAFE_OFFLINE, &device->flags);
|
||||
} else {
|
||||
if (test_bit(DASD_FLAG_OFFLINE, &device->flags))
|
||||
/* Already doing offline processing */
|
||||
goto out_busy;
|
||||
/*
|
||||
* Test if the offline processing is already running and exit if so.
|
||||
* If a safe offline is being processed this could only be a normal
|
||||
* offline that should be able to overtake the safe offline and
|
||||
* cancel any I/O we do not want to wait for any longer
|
||||
*/
|
||||
if (test_bit(DASD_FLAG_OFFLINE, &device->flags)) {
|
||||
if (test_bit(DASD_FLAG_SAFE_OFFLINE_RUNNING, &device->flags)) {
|
||||
clear_bit(DASD_FLAG_SAFE_OFFLINE_RUNNING,
|
||||
&device->flags);
|
||||
} else {
|
||||
rc = -EBUSY;
|
||||
goto out_err;
|
||||
}
|
||||
}
|
||||
|
||||
set_bit(DASD_FLAG_OFFLINE, &device->flags);
|
||||
spin_unlock_irqrestore(get_ccwdev_lock(cdev), flags);
|
||||
|
||||
/*
|
||||
* if safe_offline called set safe_offline_running flag and
|
||||
* if safe_offline is called set safe_offline_running flag and
|
||||
* clear safe_offline so that a call to normal offline
|
||||
* can overrun safe_offline processing
|
||||
*/
|
||||
if (test_and_clear_bit(DASD_FLAG_SAFE_OFFLINE, &device->flags) &&
|
||||
!test_and_set_bit(DASD_FLAG_SAFE_OFFLINE_RUNNING, &device->flags)) {
|
||||
/* need to unlock here to wait for outstanding I/O */
|
||||
spin_unlock_irqrestore(get_ccwdev_lock(cdev), flags);
|
||||
/*
|
||||
* If we want to set the device safe offline all IO operations
|
||||
* should be finished before continuing the offline process
|
||||
* so sync bdev first and then wait for our queues to become
|
||||
* empty
|
||||
*/
|
||||
/* sync blockdev and partitions */
|
||||
if (device->block) {
|
||||
rc = fsync_bdev(device->block->bdev);
|
||||
if (rc != 0)
|
||||
goto interrupted;
|
||||
}
|
||||
/* schedule device tasklet and wait for completion */
|
||||
dasd_schedule_device_bh(device);
|
||||
rc = wait_event_interruptible(shutdown_waitq,
|
||||
_wait_for_empty_queues(device));
|
||||
if (rc != 0)
|
||||
goto interrupted;
|
||||
|
||||
/*
|
||||
* check if a normal offline process overtook the offline
|
||||
* processing in this case simply do nothing beside returning
|
||||
* that we got interrupted
|
||||
* otherwise mark safe offline as not running any longer and
|
||||
* continue with normal offline
|
||||
*/
|
||||
spin_lock_irqsave(get_ccwdev_lock(cdev), flags);
|
||||
if (!test_bit(DASD_FLAG_SAFE_OFFLINE_RUNNING, &device->flags)) {
|
||||
rc = -ERESTARTSYS;
|
||||
goto out_err;
|
||||
}
|
||||
clear_bit(DASD_FLAG_SAFE_OFFLINE_RUNNING, &device->flags);
|
||||
}
|
||||
spin_unlock_irqrestore(get_ccwdev_lock(cdev), flags);
|
||||
|
||||
dasd_set_target_state(device, DASD_STATE_NEW);
|
||||
/* dasd_delete_device destroys the device reference. */
|
||||
|
@ -3624,22 +3636,18 @@ int dasd_generic_set_offline(struct ccw_device *cdev)
|
|||
*/
|
||||
if (block)
|
||||
dasd_free_block(block);
|
||||
|
||||
return 0;
|
||||
|
||||
interrupted:
|
||||
/* interrupted by signal */
|
||||
clear_bit(DASD_FLAG_SAFE_OFFLINE, &device->flags);
|
||||
spin_lock_irqsave(get_ccwdev_lock(cdev), flags);
|
||||
clear_bit(DASD_FLAG_SAFE_OFFLINE_RUNNING, &device->flags);
|
||||
clear_bit(DASD_FLAG_OFFLINE, &device->flags);
|
||||
dasd_put_device(device);
|
||||
|
||||
return rc;
|
||||
|
||||
out_busy:
|
||||
out_err:
|
||||
dasd_put_device(device);
|
||||
spin_unlock_irqrestore(get_ccwdev_lock(cdev), flags);
|
||||
|
||||
return -EBUSY;
|
||||
return rc;
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(dasd_generic_set_offline);
|
||||
|
||||
|
|
|
@ -950,11 +950,14 @@ dasd_safe_offline_store(struct device *dev, struct device_attribute *attr,
|
|||
{
|
||||
struct ccw_device *cdev = to_ccwdev(dev);
|
||||
struct dasd_device *device;
|
||||
unsigned long flags;
|
||||
int rc;
|
||||
|
||||
device = dasd_device_from_cdev(cdev);
|
||||
spin_lock_irqsave(get_ccwdev_lock(cdev), flags);
|
||||
device = dasd_device_from_cdev_locked(cdev);
|
||||
if (IS_ERR(device)) {
|
||||
rc = PTR_ERR(device);
|
||||
spin_unlock_irqrestore(get_ccwdev_lock(cdev), flags);
|
||||
goto out;
|
||||
}
|
||||
|
||||
|
@ -962,12 +965,14 @@ dasd_safe_offline_store(struct device *dev, struct device_attribute *attr,
|
|||
test_bit(DASD_FLAG_SAFE_OFFLINE_RUNNING, &device->flags)) {
|
||||
/* Already doing offline processing */
|
||||
dasd_put_device(device);
|
||||
spin_unlock_irqrestore(get_ccwdev_lock(cdev), flags);
|
||||
rc = -EBUSY;
|
||||
goto out;
|
||||
}
|
||||
|
||||
set_bit(DASD_FLAG_SAFE_OFFLINE, &device->flags);
|
||||
dasd_put_device(device);
|
||||
spin_unlock_irqrestore(get_ccwdev_lock(cdev), flags);
|
||||
|
||||
rc = ccw_device_set_offline(cdev);
|
||||
|
||||
|
|
Loading…
Reference in New Issue