scsi: sd_zbc: Avoid that resetting a zone fails sporadically

Since SCSI scanning occurs asynchronously, since sd_revalidate_disk() is
called from sd_probe_async() and since sd_revalidate_disk() calls
sd_zbc_read_zones() it can happen that sd_zbc_read_zones() is called
concurrently with blkdev_report_zones() and/or blkdev_reset_zones().  That can
cause these functions to fail with -EIO because sd_zbc_read_zones() e.g. sets
q->nr_zones to zero before restoring it to the actual value, even if no drive
characteristics have changed.  Avoid that this can happen by making the
following changes:

- Protect the code that updates zone information with blk_queue_enter()
  and blk_queue_exit().
- Modify sd_zbc_setup_seq_zones_bitmap() and sd_zbc_setup() such that
  these functions do not modify struct scsi_disk before all zone
  information has been obtained.

Note: since commit 055f6e18e0 ("block: Make q_usage_counter also track
legacy requests"; kernel v4.15) the request queue freezing mechanism also
affects legacy request queues.

Fixes: 89d9475610 ("sd: Implement support for ZBC devices")
Signed-off-by: Bart Van Assche <bart.vanassche@wdc.com>
Cc: Jens Axboe <axboe@kernel.dk>
Cc: Damien Le Moal <damien.lemoal@wdc.com>
Cc: Christoph Hellwig <hch@lst.de>
Cc: Hannes Reinecke <hare@suse.com>
Cc: stable@vger.kernel.org # v4.16
Reviewed-by: Damien Le Moal <damien.lemoal@wdc.com>
Signed-off-by: Martin K. Petersen <martin.petersen@oracle.com>
This commit is contained in:
Bart Van Assche 2018-04-16 18:04:41 -07:00 committed by Martin K. Petersen
parent 505aa4b6a8
commit ccce20fc79
2 changed files with 85 additions and 56 deletions

View File

@ -400,8 +400,10 @@ static int sd_zbc_check_capacity(struct scsi_disk *sdkp, unsigned char *buf)
* *
* Check that all zones of the device are equal. The last zone can however * Check that all zones of the device are equal. The last zone can however
* be smaller. The zone size must also be a power of two number of LBAs. * be smaller. The zone size must also be a power of two number of LBAs.
*
* Returns the zone size in bytes upon success or an error code upon failure.
*/ */
static int sd_zbc_check_zone_size(struct scsi_disk *sdkp) static s64 sd_zbc_check_zone_size(struct scsi_disk *sdkp)
{ {
u64 zone_blocks = 0; u64 zone_blocks = 0;
sector_t block = 0; sector_t block = 0;
@ -412,8 +414,6 @@ static int sd_zbc_check_zone_size(struct scsi_disk *sdkp)
int ret; int ret;
u8 same; u8 same;
sdkp->zone_blocks = 0;
/* Get a buffer */ /* Get a buffer */
buf = kmalloc(SD_ZBC_BUF_SIZE, GFP_KERNEL); buf = kmalloc(SD_ZBC_BUF_SIZE, GFP_KERNEL);
if (!buf) if (!buf)
@ -445,16 +445,17 @@ static int sd_zbc_check_zone_size(struct scsi_disk *sdkp)
/* Parse zone descriptors */ /* Parse zone descriptors */
while (rec < buf + buf_len) { while (rec < buf + buf_len) {
zone_blocks = get_unaligned_be64(&rec[8]); u64 this_zone_blocks = get_unaligned_be64(&rec[8]);
if (sdkp->zone_blocks == 0) {
sdkp->zone_blocks = zone_blocks; if (zone_blocks == 0) {
} else if (zone_blocks != sdkp->zone_blocks && zone_blocks = this_zone_blocks;
(block + zone_blocks < sdkp->capacity } else if (this_zone_blocks != zone_blocks &&
|| zone_blocks > sdkp->zone_blocks)) { (block + this_zone_blocks < sdkp->capacity
zone_blocks = 0; || this_zone_blocks > zone_blocks)) {
this_zone_blocks = 0;
goto out; goto out;
} }
block += zone_blocks; block += this_zone_blocks;
rec += 64; rec += 64;
} }
@ -467,8 +468,6 @@ static int sd_zbc_check_zone_size(struct scsi_disk *sdkp)
} while (block < sdkp->capacity); } while (block < sdkp->capacity);
zone_blocks = sdkp->zone_blocks;
out: out:
if (!zone_blocks) { if (!zone_blocks) {
if (sdkp->first_scan) if (sdkp->first_scan)
@ -488,8 +487,7 @@ out:
"Zone size too large\n"); "Zone size too large\n");
ret = -ENODEV; ret = -ENODEV;
} else { } else {
sdkp->zone_blocks = zone_blocks; ret = zone_blocks;
sdkp->zone_shift = ilog2(zone_blocks);
} }
out_free: out_free:
@ -500,15 +498,14 @@ out_free:
/** /**
* sd_zbc_alloc_zone_bitmap - Allocate a zone bitmap (one bit per zone). * sd_zbc_alloc_zone_bitmap - Allocate a zone bitmap (one bit per zone).
* @sdkp: The disk of the bitmap * @nr_zones: Number of zones to allocate space for.
* @numa_node: NUMA node to allocate the memory from.
*/ */
static inline unsigned long *sd_zbc_alloc_zone_bitmap(struct scsi_disk *sdkp) static inline unsigned long *
sd_zbc_alloc_zone_bitmap(u32 nr_zones, int numa_node)
{ {
struct request_queue *q = sdkp->disk->queue; return kzalloc_node(BITS_TO_LONGS(nr_zones) * sizeof(unsigned long),
GFP_KERNEL, numa_node);
return kzalloc_node(BITS_TO_LONGS(sdkp->nr_zones)
* sizeof(unsigned long),
GFP_KERNEL, q->node);
} }
/** /**
@ -516,6 +513,7 @@ static inline unsigned long *sd_zbc_alloc_zone_bitmap(struct scsi_disk *sdkp)
* @sdkp: disk used * @sdkp: disk used
* @buf: report reply buffer * @buf: report reply buffer
* @buflen: length of @buf * @buflen: length of @buf
* @zone_shift: logarithm base 2 of the number of blocks in a zone
* @seq_zones_bitmap: bitmap of sequential zones to set * @seq_zones_bitmap: bitmap of sequential zones to set
* *
* Parse reported zone descriptors in @buf to identify sequential zones and * Parse reported zone descriptors in @buf to identify sequential zones and
@ -525,7 +523,7 @@ static inline unsigned long *sd_zbc_alloc_zone_bitmap(struct scsi_disk *sdkp)
* Return the LBA after the last zone reported. * Return the LBA after the last zone reported.
*/ */
static sector_t sd_zbc_get_seq_zones(struct scsi_disk *sdkp, unsigned char *buf, static sector_t sd_zbc_get_seq_zones(struct scsi_disk *sdkp, unsigned char *buf,
unsigned int buflen, unsigned int buflen, u32 zone_shift,
unsigned long *seq_zones_bitmap) unsigned long *seq_zones_bitmap)
{ {
sector_t lba, next_lba = sdkp->capacity; sector_t lba, next_lba = sdkp->capacity;
@ -544,7 +542,7 @@ static sector_t sd_zbc_get_seq_zones(struct scsi_disk *sdkp, unsigned char *buf,
if (type != ZBC_ZONE_TYPE_CONV && if (type != ZBC_ZONE_TYPE_CONV &&
cond != ZBC_ZONE_COND_READONLY && cond != ZBC_ZONE_COND_READONLY &&
cond != ZBC_ZONE_COND_OFFLINE) cond != ZBC_ZONE_COND_OFFLINE)
set_bit(lba >> sdkp->zone_shift, seq_zones_bitmap); set_bit(lba >> zone_shift, seq_zones_bitmap);
next_lba = lba + get_unaligned_be64(&rec[8]); next_lba = lba + get_unaligned_be64(&rec[8]);
rec += 64; rec += 64;
} }
@ -553,12 +551,16 @@ static sector_t sd_zbc_get_seq_zones(struct scsi_disk *sdkp, unsigned char *buf,
} }
/** /**
* sd_zbc_setup_seq_zones_bitmap - Initialize the disk seq zone bitmap. * sd_zbc_setup_seq_zones_bitmap - Initialize a seq zone bitmap.
* @sdkp: target disk * @sdkp: target disk
* @zone_shift: logarithm base 2 of the number of blocks in a zone
* @nr_zones: number of zones to set up a seq zone bitmap for
* *
* Allocate a zone bitmap and initialize it by identifying sequential zones. * Allocate a zone bitmap and initialize it by identifying sequential zones.
*/ */
static int sd_zbc_setup_seq_zones_bitmap(struct scsi_disk *sdkp) static unsigned long *
sd_zbc_setup_seq_zones_bitmap(struct scsi_disk *sdkp, u32 zone_shift,
u32 nr_zones)
{ {
struct request_queue *q = sdkp->disk->queue; struct request_queue *q = sdkp->disk->queue;
unsigned long *seq_zones_bitmap; unsigned long *seq_zones_bitmap;
@ -566,9 +568,9 @@ static int sd_zbc_setup_seq_zones_bitmap(struct scsi_disk *sdkp)
unsigned char *buf; unsigned char *buf;
int ret = -ENOMEM; int ret = -ENOMEM;
seq_zones_bitmap = sd_zbc_alloc_zone_bitmap(sdkp); seq_zones_bitmap = sd_zbc_alloc_zone_bitmap(nr_zones, q->node);
if (!seq_zones_bitmap) if (!seq_zones_bitmap)
return -ENOMEM; return ERR_PTR(-ENOMEM);
buf = kmalloc(SD_ZBC_BUF_SIZE, GFP_KERNEL); buf = kmalloc(SD_ZBC_BUF_SIZE, GFP_KERNEL);
if (!buf) if (!buf)
@ -579,7 +581,7 @@ static int sd_zbc_setup_seq_zones_bitmap(struct scsi_disk *sdkp)
if (ret) if (ret)
goto out; goto out;
lba = sd_zbc_get_seq_zones(sdkp, buf, SD_ZBC_BUF_SIZE, lba = sd_zbc_get_seq_zones(sdkp, buf, SD_ZBC_BUF_SIZE,
seq_zones_bitmap); zone_shift, seq_zones_bitmap);
} }
if (lba != sdkp->capacity) { if (lba != sdkp->capacity) {
@ -591,12 +593,9 @@ out:
kfree(buf); kfree(buf);
if (ret) { if (ret) {
kfree(seq_zones_bitmap); kfree(seq_zones_bitmap);
return ret; return ERR_PTR(ret);
} }
return seq_zones_bitmap;
q->seq_zones_bitmap = seq_zones_bitmap;
return 0;
} }
static void sd_zbc_cleanup(struct scsi_disk *sdkp) static void sd_zbc_cleanup(struct scsi_disk *sdkp)
@ -612,44 +611,64 @@ static void sd_zbc_cleanup(struct scsi_disk *sdkp)
q->nr_zones = 0; q->nr_zones = 0;
} }
static int sd_zbc_setup(struct scsi_disk *sdkp) static int sd_zbc_setup(struct scsi_disk *sdkp, u32 zone_blocks)
{ {
struct request_queue *q = sdkp->disk->queue; struct request_queue *q = sdkp->disk->queue;
u32 zone_shift = ilog2(zone_blocks);
u32 nr_zones;
int ret; int ret;
/* READ16/WRITE16 is mandatory for ZBC disks */
sdkp->device->use_16_for_rw = 1;
sdkp->device->use_10_for_rw = 0;
/* chunk_sectors indicates the zone size */ /* chunk_sectors indicates the zone size */
blk_queue_chunk_sectors(sdkp->disk->queue, blk_queue_chunk_sectors(q,
logical_to_sectors(sdkp->device, sdkp->zone_blocks)); logical_to_sectors(sdkp->device, zone_blocks));
sdkp->nr_zones = nr_zones = round_up(sdkp->capacity, zone_blocks) >> zone_shift;
round_up(sdkp->capacity, sdkp->zone_blocks) >> sdkp->zone_shift;
/* /*
* Initialize the device request queue information if the number * Initialize the device request queue information if the number
* of zones changed. * of zones changed.
*/ */
if (sdkp->nr_zones != q->nr_zones) { if (nr_zones != sdkp->nr_zones || nr_zones != q->nr_zones) {
unsigned long *seq_zones_wlock = NULL, *seq_zones_bitmap = NULL;
size_t zone_bitmap_size;
sd_zbc_cleanup(sdkp); if (nr_zones) {
seq_zones_wlock = sd_zbc_alloc_zone_bitmap(nr_zones,
q->nr_zones = sdkp->nr_zones; q->node);
if (sdkp->nr_zones) { if (!seq_zones_wlock) {
q->seq_zones_wlock = sd_zbc_alloc_zone_bitmap(sdkp);
if (!q->seq_zones_wlock) {
ret = -ENOMEM; ret = -ENOMEM;
goto err; goto err;
} }
ret = sd_zbc_setup_seq_zones_bitmap(sdkp); seq_zones_bitmap = sd_zbc_setup_seq_zones_bitmap(sdkp,
if (ret) { zone_shift, nr_zones);
sd_zbc_cleanup(sdkp); if (IS_ERR(seq_zones_bitmap)) {
ret = PTR_ERR(seq_zones_bitmap);
kfree(seq_zones_wlock);
goto err; goto err;
} }
} }
zone_bitmap_size = BITS_TO_LONGS(nr_zones) *
sizeof(unsigned long);
blk_mq_freeze_queue(q);
if (q->nr_zones != nr_zones) {
/* READ16/WRITE16 is mandatory for ZBC disks */
sdkp->device->use_16_for_rw = 1;
sdkp->device->use_10_for_rw = 0;
sdkp->zone_blocks = zone_blocks;
sdkp->zone_shift = zone_shift;
sdkp->nr_zones = nr_zones;
q->nr_zones = nr_zones;
swap(q->seq_zones_wlock, seq_zones_wlock);
swap(q->seq_zones_bitmap, seq_zones_bitmap);
} else if (memcmp(q->seq_zones_bitmap, seq_zones_bitmap,
zone_bitmap_size) != 0) {
memcpy(q->seq_zones_bitmap, seq_zones_bitmap,
zone_bitmap_size);
}
blk_mq_unfreeze_queue(q);
kfree(seq_zones_wlock);
kfree(seq_zones_bitmap);
} }
return 0; return 0;
@ -661,6 +680,7 @@ err:
int sd_zbc_read_zones(struct scsi_disk *sdkp, unsigned char *buf) int sd_zbc_read_zones(struct scsi_disk *sdkp, unsigned char *buf)
{ {
int64_t zone_blocks;
int ret; int ret;
if (!sd_is_zoned(sdkp)) if (!sd_is_zoned(sdkp))
@ -697,12 +717,16 @@ int sd_zbc_read_zones(struct scsi_disk *sdkp, unsigned char *buf)
* Check zone size: only devices with a constant zone size (except * Check zone size: only devices with a constant zone size (except
* an eventual last runt zone) that is a power of 2 are supported. * an eventual last runt zone) that is a power of 2 are supported.
*/ */
ret = sd_zbc_check_zone_size(sdkp); zone_blocks = sd_zbc_check_zone_size(sdkp);
if (ret) ret = -EFBIG;
if (zone_blocks != (u32)zone_blocks)
goto err;
ret = zone_blocks;
if (ret < 0)
goto err; goto err;
/* The drive satisfies the kernel restrictions: set it up */ /* The drive satisfies the kernel restrictions: set it up */
ret = sd_zbc_setup(sdkp); ret = sd_zbc_setup(sdkp, zone_blocks);
if (ret) if (ret)
goto err; goto err;

View File

@ -605,6 +605,11 @@ struct request_queue {
* initialized by the low level device driver (e.g. scsi/sd.c). * initialized by the low level device driver (e.g. scsi/sd.c).
* Stacking drivers (device mappers) may or may not initialize * Stacking drivers (device mappers) may or may not initialize
* these fields. * these fields.
*
* Reads of this information must be protected with blk_queue_enter() /
* blk_queue_exit(). Modifying this information is only allowed while
* no requests are being processed. See also blk_mq_freeze_queue() and
* blk_mq_unfreeze_queue().
*/ */
unsigned int nr_zones; unsigned int nr_zones;
unsigned long *seq_zones_bitmap; unsigned long *seq_zones_bitmap;