Merge tag 'md/4.12-rc2' of git://git.kernel.org/pub/scm/linux/kernel/git/shli/md
Pull MD fixes from Shaohua Li: - Several bug fixes for raid5-cache from Song Liu, mainly handle journal disk error - Fix bad block handling in choosing raid1 disk from Tomasz Majchrzak - Simplify external metadata array sysfs handling from Artur Paszkiewicz - Optimize raid0 discard handling from me, now raid0 will dispatch large discard IO directly to underlayer disks. * tag 'md/4.12-rc2' of git://git.kernel.org/pub/scm/linux/kernel/git/shli/md: raid1: prefer disk without bad blocks md/r5cache: handle sync with data in write back cache md/r5cache: gracefully handle journal device errors for writeback mode md/raid1/10: avoid unnecessary locking md/raid5-cache: in r5l_do_submit_io(), submit io->split_bio first md/md0: optimize raid0 discard handling md: don't return -EAGAIN in md_allow_write for external metadata arrays md/raid5: make use of spin_lock_irq over local_irq_disable + spin_lock
This commit is contained in:
commit
8b4822de59
|
@ -8022,18 +8022,15 @@ EXPORT_SYMBOL(md_write_end);
|
|||
* may proceed without blocking. It is important to call this before
|
||||
* attempting a GFP_KERNEL allocation while holding the mddev lock.
|
||||
* Must be called with mddev_lock held.
|
||||
*
|
||||
* In the ->external case MD_SB_CHANGE_PENDING can not be cleared until mddev->lock
|
||||
* is dropped, so return -EAGAIN after notifying userspace.
|
||||
*/
|
||||
int md_allow_write(struct mddev *mddev)
|
||||
void md_allow_write(struct mddev *mddev)
|
||||
{
|
||||
if (!mddev->pers)
|
||||
return 0;
|
||||
return;
|
||||
if (mddev->ro)
|
||||
return 0;
|
||||
return;
|
||||
if (!mddev->pers->sync_request)
|
||||
return 0;
|
||||
return;
|
||||
|
||||
spin_lock(&mddev->lock);
|
||||
if (mddev->in_sync) {
|
||||
|
@ -8046,13 +8043,12 @@ int md_allow_write(struct mddev *mddev)
|
|||
spin_unlock(&mddev->lock);
|
||||
md_update_sb(mddev, 0);
|
||||
sysfs_notify_dirent_safe(mddev->sysfs_state);
|
||||
/* wait for the dirty state to be recorded in the metadata */
|
||||
wait_event(mddev->sb_wait,
|
||||
!test_bit(MD_SB_CHANGE_CLEAN, &mddev->sb_flags) &&
|
||||
!test_bit(MD_SB_CHANGE_PENDING, &mddev->sb_flags));
|
||||
} else
|
||||
spin_unlock(&mddev->lock);
|
||||
|
||||
if (test_bit(MD_SB_CHANGE_PENDING, &mddev->sb_flags))
|
||||
return -EAGAIN;
|
||||
else
|
||||
return 0;
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(md_allow_write);
|
||||
|
||||
|
|
|
@ -665,7 +665,7 @@ extern int sync_page_io(struct md_rdev *rdev, sector_t sector, int size,
|
|||
bool metadata_op);
|
||||
extern void md_do_sync(struct md_thread *thread);
|
||||
extern void md_new_event(struct mddev *mddev);
|
||||
extern int md_allow_write(struct mddev *mddev);
|
||||
extern void md_allow_write(struct mddev *mddev);
|
||||
extern void md_wait_for_blocked_rdev(struct md_rdev *rdev, struct mddev *mddev);
|
||||
extern void md_set_array_sectors(struct mddev *mddev, sector_t array_sectors);
|
||||
extern int md_check_no_bitmap(struct mddev *mddev);
|
||||
|
|
|
@ -385,7 +385,7 @@ static int raid0_run(struct mddev *mddev)
|
|||
blk_queue_max_hw_sectors(mddev->queue, mddev->chunk_sectors);
|
||||
blk_queue_max_write_same_sectors(mddev->queue, mddev->chunk_sectors);
|
||||
blk_queue_max_write_zeroes_sectors(mddev->queue, mddev->chunk_sectors);
|
||||
blk_queue_max_discard_sectors(mddev->queue, mddev->chunk_sectors);
|
||||
blk_queue_max_discard_sectors(mddev->queue, UINT_MAX);
|
||||
|
||||
blk_queue_io_min(mddev->queue, mddev->chunk_sectors << 9);
|
||||
blk_queue_io_opt(mddev->queue,
|
||||
|
@ -459,6 +459,95 @@ static inline int is_io_in_chunk_boundary(struct mddev *mddev,
|
|||
}
|
||||
}
|
||||
|
||||
static void raid0_handle_discard(struct mddev *mddev, struct bio *bio)
|
||||
{
|
||||
struct r0conf *conf = mddev->private;
|
||||
struct strip_zone *zone;
|
||||
sector_t start = bio->bi_iter.bi_sector;
|
||||
sector_t end;
|
||||
unsigned int stripe_size;
|
||||
sector_t first_stripe_index, last_stripe_index;
|
||||
sector_t start_disk_offset;
|
||||
unsigned int start_disk_index;
|
||||
sector_t end_disk_offset;
|
||||
unsigned int end_disk_index;
|
||||
unsigned int disk;
|
||||
|
||||
zone = find_zone(conf, &start);
|
||||
|
||||
if (bio_end_sector(bio) > zone->zone_end) {
|
||||
struct bio *split = bio_split(bio,
|
||||
zone->zone_end - bio->bi_iter.bi_sector, GFP_NOIO,
|
||||
mddev->bio_set);
|
||||
bio_chain(split, bio);
|
||||
generic_make_request(bio);
|
||||
bio = split;
|
||||
end = zone->zone_end;
|
||||
} else
|
||||
end = bio_end_sector(bio);
|
||||
|
||||
if (zone != conf->strip_zone)
|
||||
end = end - zone[-1].zone_end;
|
||||
|
||||
/* Now start and end is the offset in zone */
|
||||
stripe_size = zone->nb_dev * mddev->chunk_sectors;
|
||||
|
||||
first_stripe_index = start;
|
||||
sector_div(first_stripe_index, stripe_size);
|
||||
last_stripe_index = end;
|
||||
sector_div(last_stripe_index, stripe_size);
|
||||
|
||||
start_disk_index = (int)(start - first_stripe_index * stripe_size) /
|
||||
mddev->chunk_sectors;
|
||||
start_disk_offset = ((int)(start - first_stripe_index * stripe_size) %
|
||||
mddev->chunk_sectors) +
|
||||
first_stripe_index * mddev->chunk_sectors;
|
||||
end_disk_index = (int)(end - last_stripe_index * stripe_size) /
|
||||
mddev->chunk_sectors;
|
||||
end_disk_offset = ((int)(end - last_stripe_index * stripe_size) %
|
||||
mddev->chunk_sectors) +
|
||||
last_stripe_index * mddev->chunk_sectors;
|
||||
|
||||
for (disk = 0; disk < zone->nb_dev; disk++) {
|
||||
sector_t dev_start, dev_end;
|
||||
struct bio *discard_bio = NULL;
|
||||
struct md_rdev *rdev;
|
||||
|
||||
if (disk < start_disk_index)
|
||||
dev_start = (first_stripe_index + 1) *
|
||||
mddev->chunk_sectors;
|
||||
else if (disk > start_disk_index)
|
||||
dev_start = first_stripe_index * mddev->chunk_sectors;
|
||||
else
|
||||
dev_start = start_disk_offset;
|
||||
|
||||
if (disk < end_disk_index)
|
||||
dev_end = (last_stripe_index + 1) * mddev->chunk_sectors;
|
||||
else if (disk > end_disk_index)
|
||||
dev_end = last_stripe_index * mddev->chunk_sectors;
|
||||
else
|
||||
dev_end = end_disk_offset;
|
||||
|
||||
if (dev_end <= dev_start)
|
||||
continue;
|
||||
|
||||
rdev = conf->devlist[(zone - conf->strip_zone) *
|
||||
conf->strip_zone[0].nb_dev + disk];
|
||||
if (__blkdev_issue_discard(rdev->bdev,
|
||||
dev_start + zone->dev_start + rdev->data_offset,
|
||||
dev_end - dev_start, GFP_NOIO, 0, &discard_bio) ||
|
||||
!discard_bio)
|
||||
continue;
|
||||
bio_chain(discard_bio, bio);
|
||||
if (mddev->gendisk)
|
||||
trace_block_bio_remap(bdev_get_queue(rdev->bdev),
|
||||
discard_bio, disk_devt(mddev->gendisk),
|
||||
bio->bi_iter.bi_sector);
|
||||
generic_make_request(discard_bio);
|
||||
}
|
||||
bio_endio(bio);
|
||||
}
|
||||
|
||||
static void raid0_make_request(struct mddev *mddev, struct bio *bio)
|
||||
{
|
||||
struct strip_zone *zone;
|
||||
|
@ -473,6 +562,11 @@ static void raid0_make_request(struct mddev *mddev, struct bio *bio)
|
|||
return;
|
||||
}
|
||||
|
||||
if (unlikely((bio_op(bio) == REQ_OP_DISCARD))) {
|
||||
raid0_handle_discard(mddev, bio);
|
||||
return;
|
||||
}
|
||||
|
||||
bio_sector = bio->bi_iter.bi_sector;
|
||||
sector = bio_sector;
|
||||
chunk_sects = mddev->chunk_sectors;
|
||||
|
@ -498,19 +592,13 @@ static void raid0_make_request(struct mddev *mddev, struct bio *bio)
|
|||
bio->bi_iter.bi_sector = sector + zone->dev_start +
|
||||
tmp_dev->data_offset;
|
||||
|
||||
if (unlikely((bio_op(bio) == REQ_OP_DISCARD) &&
|
||||
!blk_queue_discard(bdev_get_queue(bio->bi_bdev)))) {
|
||||
/* Just ignore it */
|
||||
bio_endio(bio);
|
||||
} else {
|
||||
if (mddev->gendisk)
|
||||
trace_block_bio_remap(bdev_get_queue(bio->bi_bdev),
|
||||
bio, disk_devt(mddev->gendisk),
|
||||
bio_sector);
|
||||
mddev_check_writesame(mddev, bio);
|
||||
mddev_check_write_zeroes(mddev, bio);
|
||||
generic_make_request(bio);
|
||||
}
|
||||
if (mddev->gendisk)
|
||||
trace_block_bio_remap(bdev_get_queue(bio->bi_bdev),
|
||||
bio, disk_devt(mddev->gendisk),
|
||||
bio_sector);
|
||||
mddev_check_writesame(mddev, bio);
|
||||
mddev_check_write_zeroes(mddev, bio);
|
||||
generic_make_request(bio);
|
||||
}
|
||||
|
||||
static void raid0_status(struct seq_file *seq, struct mddev *mddev)
|
||||
|
|
|
@ -666,8 +666,11 @@ static int read_balance(struct r1conf *conf, struct r1bio *r1_bio, int *max_sect
|
|||
break;
|
||||
}
|
||||
continue;
|
||||
} else
|
||||
} else {
|
||||
if ((sectors > best_good_sectors) && (best_disk >= 0))
|
||||
best_disk = -1;
|
||||
best_good_sectors = sectors;
|
||||
}
|
||||
|
||||
if (best_disk >= 0)
|
||||
/* At least two disks to choose from so failfast is OK */
|
||||
|
@ -1529,17 +1532,16 @@ static void raid1_write_request(struct mddev *mddev, struct bio *bio,
|
|||
plug = container_of(cb, struct raid1_plug_cb, cb);
|
||||
else
|
||||
plug = NULL;
|
||||
spin_lock_irqsave(&conf->device_lock, flags);
|
||||
if (plug) {
|
||||
bio_list_add(&plug->pending, mbio);
|
||||
plug->pending_cnt++;
|
||||
} else {
|
||||
spin_lock_irqsave(&conf->device_lock, flags);
|
||||
bio_list_add(&conf->pending_bio_list, mbio);
|
||||
conf->pending_count++;
|
||||
}
|
||||
spin_unlock_irqrestore(&conf->device_lock, flags);
|
||||
if (!plug)
|
||||
spin_unlock_irqrestore(&conf->device_lock, flags);
|
||||
md_wakeup_thread(mddev->thread);
|
||||
}
|
||||
}
|
||||
|
||||
r1_bio_write_done(r1_bio);
|
||||
|
@ -3197,7 +3199,7 @@ static int raid1_reshape(struct mddev *mddev)
|
|||
struct r1conf *conf = mddev->private;
|
||||
int cnt, raid_disks;
|
||||
unsigned long flags;
|
||||
int d, d2, err;
|
||||
int d, d2;
|
||||
|
||||
/* Cannot change chunk_size, layout, or level */
|
||||
if (mddev->chunk_sectors != mddev->new_chunk_sectors ||
|
||||
|
@ -3209,11 +3211,8 @@ static int raid1_reshape(struct mddev *mddev)
|
|||
return -EINVAL;
|
||||
}
|
||||
|
||||
if (!mddev_is_clustered(mddev)) {
|
||||
err = md_allow_write(mddev);
|
||||
if (err)
|
||||
return err;
|
||||
}
|
||||
if (!mddev_is_clustered(mddev))
|
||||
md_allow_write(mddev);
|
||||
|
||||
raid_disks = mddev->raid_disks + mddev->delta_disks;
|
||||
|
||||
|
|
|
@ -1282,17 +1282,16 @@ static void raid10_write_one_disk(struct mddev *mddev, struct r10bio *r10_bio,
|
|||
plug = container_of(cb, struct raid10_plug_cb, cb);
|
||||
else
|
||||
plug = NULL;
|
||||
spin_lock_irqsave(&conf->device_lock, flags);
|
||||
if (plug) {
|
||||
bio_list_add(&plug->pending, mbio);
|
||||
plug->pending_cnt++;
|
||||
} else {
|
||||
spin_lock_irqsave(&conf->device_lock, flags);
|
||||
bio_list_add(&conf->pending_bio_list, mbio);
|
||||
conf->pending_count++;
|
||||
}
|
||||
spin_unlock_irqrestore(&conf->device_lock, flags);
|
||||
if (!plug)
|
||||
spin_unlock_irqrestore(&conf->device_lock, flags);
|
||||
md_wakeup_thread(mddev->thread);
|
||||
}
|
||||
}
|
||||
|
||||
static void raid10_write_request(struct mddev *mddev, struct bio *bio,
|
||||
|
|
|
@ -24,6 +24,7 @@
|
|||
#include "md.h"
|
||||
#include "raid5.h"
|
||||
#include "bitmap.h"
|
||||
#include "raid5-log.h"
|
||||
|
||||
/*
|
||||
* metadata/data stored in disk with 4k size unit (a block) regardless
|
||||
|
@ -622,20 +623,30 @@ static void r5l_do_submit_io(struct r5l_log *log, struct r5l_io_unit *io)
|
|||
__r5l_set_io_unit_state(io, IO_UNIT_IO_START);
|
||||
spin_unlock_irqrestore(&log->io_list_lock, flags);
|
||||
|
||||
/*
|
||||
* In case of journal device failures, submit_bio will get error
|
||||
* and calls endio, then active stripes will continue write
|
||||
* process. Therefore, it is not necessary to check Faulty bit
|
||||
* of journal device here.
|
||||
*
|
||||
* We can't check split_bio after current_bio is submitted. If
|
||||
* io->split_bio is null, after current_bio is submitted, current_bio
|
||||
* might already be completed and the io_unit is freed. We submit
|
||||
* split_bio first to avoid the issue.
|
||||
*/
|
||||
if (io->split_bio) {
|
||||
if (io->has_flush)
|
||||
io->split_bio->bi_opf |= REQ_PREFLUSH;
|
||||
if (io->has_fua)
|
||||
io->split_bio->bi_opf |= REQ_FUA;
|
||||
submit_bio(io->split_bio);
|
||||
}
|
||||
|
||||
if (io->has_flush)
|
||||
io->current_bio->bi_opf |= REQ_PREFLUSH;
|
||||
if (io->has_fua)
|
||||
io->current_bio->bi_opf |= REQ_FUA;
|
||||
submit_bio(io->current_bio);
|
||||
|
||||
if (!io->split_bio)
|
||||
return;
|
||||
|
||||
if (io->has_flush)
|
||||
io->split_bio->bi_opf |= REQ_PREFLUSH;
|
||||
if (io->has_fua)
|
||||
io->split_bio->bi_opf |= REQ_FUA;
|
||||
submit_bio(io->split_bio);
|
||||
}
|
||||
|
||||
/* deferred io_unit will be dispatched here */
|
||||
|
@ -670,6 +681,11 @@ static void r5c_disable_writeback_async(struct work_struct *work)
|
|||
return;
|
||||
pr_info("md/raid:%s: Disabling writeback cache for degraded array.\n",
|
||||
mdname(mddev));
|
||||
|
||||
/* wait superblock change before suspend */
|
||||
wait_event(mddev->sb_wait,
|
||||
!test_bit(MD_SB_CHANGE_PENDING, &mddev->sb_flags));
|
||||
|
||||
mddev_suspend(mddev);
|
||||
log->r5c_journal_mode = R5C_JOURNAL_MODE_WRITE_THROUGH;
|
||||
mddev_resume(mddev);
|
||||
|
@ -2621,8 +2637,11 @@ int r5c_try_caching_write(struct r5conf *conf,
|
|||
* When run in degraded mode, array is set to write-through mode.
|
||||
* This check helps drain pending write safely in the transition to
|
||||
* write-through mode.
|
||||
*
|
||||
* When a stripe is syncing, the write is also handled in write
|
||||
* through mode.
|
||||
*/
|
||||
if (s->failed) {
|
||||
if (s->failed || test_bit(STRIPE_SYNCING, &sh->state)) {
|
||||
r5c_make_stripe_write_out(sh);
|
||||
return -EAGAIN;
|
||||
}
|
||||
|
@ -2825,6 +2844,9 @@ void r5c_finish_stripe_write_out(struct r5conf *conf,
|
|||
}
|
||||
|
||||
r5l_append_flush_payload(log, sh->sector);
|
||||
/* stripe is flused to raid disks, we can do resync now */
|
||||
if (test_bit(STRIPE_SYNC_REQUESTED, &sh->state))
|
||||
set_bit(STRIPE_HANDLE, &sh->state);
|
||||
}
|
||||
|
||||
int r5c_cache_data(struct r5l_log *log, struct stripe_head *sh)
|
||||
|
@ -2973,7 +2995,7 @@ ioerr:
|
|||
return ret;
|
||||
}
|
||||
|
||||
void r5c_update_on_rdev_error(struct mddev *mddev)
|
||||
void r5c_update_on_rdev_error(struct mddev *mddev, struct md_rdev *rdev)
|
||||
{
|
||||
struct r5conf *conf = mddev->private;
|
||||
struct r5l_log *log = conf->log;
|
||||
|
@ -2981,7 +3003,8 @@ void r5c_update_on_rdev_error(struct mddev *mddev)
|
|||
if (!log)
|
||||
return;
|
||||
|
||||
if (raid5_calc_degraded(conf) > 0 &&
|
||||
if ((raid5_calc_degraded(conf) > 0 ||
|
||||
test_bit(Journal, &rdev->flags)) &&
|
||||
conf->log->r5c_journal_mode == R5C_JOURNAL_MODE_WRITE_BACK)
|
||||
schedule_work(&log->disable_writeback_work);
|
||||
}
|
||||
|
|
|
@ -28,7 +28,8 @@ extern void r5c_flush_cache(struct r5conf *conf, int num);
|
|||
extern void r5c_check_stripe_cache_usage(struct r5conf *conf);
|
||||
extern void r5c_check_cached_full_stripe(struct r5conf *conf);
|
||||
extern struct md_sysfs_entry r5c_journal_mode;
|
||||
extern void r5c_update_on_rdev_error(struct mddev *mddev);
|
||||
extern void r5c_update_on_rdev_error(struct mddev *mddev,
|
||||
struct md_rdev *rdev);
|
||||
extern bool r5c_big_stripe_cached(struct r5conf *conf, sector_t sect);
|
||||
|
||||
extern struct dma_async_tx_descriptor *
|
||||
|
|
|
@ -103,8 +103,7 @@ static inline void unlock_device_hash_lock(struct r5conf *conf, int hash)
|
|||
static inline void lock_all_device_hash_locks_irq(struct r5conf *conf)
|
||||
{
|
||||
int i;
|
||||
local_irq_disable();
|
||||
spin_lock(conf->hash_locks);
|
||||
spin_lock_irq(conf->hash_locks);
|
||||
for (i = 1; i < NR_STRIPE_HASH_LOCKS; i++)
|
||||
spin_lock_nest_lock(conf->hash_locks + i, conf->hash_locks);
|
||||
spin_lock(&conf->device_lock);
|
||||
|
@ -114,9 +113,9 @@ static inline void unlock_all_device_hash_locks_irq(struct r5conf *conf)
|
|||
{
|
||||
int i;
|
||||
spin_unlock(&conf->device_lock);
|
||||
for (i = NR_STRIPE_HASH_LOCKS; i; i--)
|
||||
spin_unlock(conf->hash_locks + i - 1);
|
||||
local_irq_enable();
|
||||
for (i = NR_STRIPE_HASH_LOCKS - 1; i; i--)
|
||||
spin_unlock(conf->hash_locks + i);
|
||||
spin_unlock_irq(conf->hash_locks);
|
||||
}
|
||||
|
||||
/* Find first data disk in a raid6 stripe */
|
||||
|
@ -234,11 +233,15 @@ static void do_release_stripe(struct r5conf *conf, struct stripe_head *sh,
|
|||
if (test_bit(R5_InJournal, &sh->dev[i].flags))
|
||||
injournal++;
|
||||
/*
|
||||
* When quiesce in r5c write back, set STRIPE_HANDLE for stripes with
|
||||
* data in journal, so they are not released to cached lists
|
||||
* In the following cases, the stripe cannot be released to cached
|
||||
* lists. Therefore, we make the stripe write out and set
|
||||
* STRIPE_HANDLE:
|
||||
* 1. when quiesce in r5c write back;
|
||||
* 2. when resync is requested fot the stripe.
|
||||
*/
|
||||
if (conf->quiesce && r5c_is_writeback(conf->log) &&
|
||||
!test_bit(STRIPE_HANDLE, &sh->state) && injournal != 0) {
|
||||
if (test_bit(STRIPE_SYNC_REQUESTED, &sh->state) ||
|
||||
(conf->quiesce && r5c_is_writeback(conf->log) &&
|
||||
!test_bit(STRIPE_HANDLE, &sh->state) && injournal != 0)) {
|
||||
if (test_bit(STRIPE_R5C_CACHING, &sh->state))
|
||||
r5c_make_stripe_write_out(sh);
|
||||
set_bit(STRIPE_HANDLE, &sh->state);
|
||||
|
@ -714,12 +717,11 @@ static bool is_full_stripe_write(struct stripe_head *sh)
|
|||
|
||||
static void lock_two_stripes(struct stripe_head *sh1, struct stripe_head *sh2)
|
||||
{
|
||||
local_irq_disable();
|
||||
if (sh1 > sh2) {
|
||||
spin_lock(&sh2->stripe_lock);
|
||||
spin_lock_irq(&sh2->stripe_lock);
|
||||
spin_lock_nested(&sh1->stripe_lock, 1);
|
||||
} else {
|
||||
spin_lock(&sh1->stripe_lock);
|
||||
spin_lock_irq(&sh1->stripe_lock);
|
||||
spin_lock_nested(&sh2->stripe_lock, 1);
|
||||
}
|
||||
}
|
||||
|
@ -727,8 +729,7 @@ static void lock_two_stripes(struct stripe_head *sh1, struct stripe_head *sh2)
|
|||
static void unlock_two_stripes(struct stripe_head *sh1, struct stripe_head *sh2)
|
||||
{
|
||||
spin_unlock(&sh1->stripe_lock);
|
||||
spin_unlock(&sh2->stripe_lock);
|
||||
local_irq_enable();
|
||||
spin_unlock_irq(&sh2->stripe_lock);
|
||||
}
|
||||
|
||||
/* Only freshly new full stripe normal write stripe can be added to a batch list */
|
||||
|
@ -2312,14 +2313,12 @@ static int resize_stripes(struct r5conf *conf, int newsize)
|
|||
struct stripe_head *osh, *nsh;
|
||||
LIST_HEAD(newstripes);
|
||||
struct disk_info *ndisks;
|
||||
int err;
|
||||
int err = 0;
|
||||
struct kmem_cache *sc;
|
||||
int i;
|
||||
int hash, cnt;
|
||||
|
||||
err = md_allow_write(conf->mddev);
|
||||
if (err)
|
||||
return err;
|
||||
md_allow_write(conf->mddev);
|
||||
|
||||
/* Step 1 */
|
||||
sc = kmem_cache_create(conf->cache_name[1-conf->active_name],
|
||||
|
@ -2694,7 +2693,7 @@ static void raid5_error(struct mddev *mddev, struct md_rdev *rdev)
|
|||
bdevname(rdev->bdev, b),
|
||||
mdname(mddev),
|
||||
conf->raid_disks - mddev->degraded);
|
||||
r5c_update_on_rdev_error(mddev);
|
||||
r5c_update_on_rdev_error(mddev, rdev);
|
||||
}
|
||||
|
||||
/*
|
||||
|
@ -3055,6 +3054,11 @@ sector_t raid5_compute_blocknr(struct stripe_head *sh, int i, int previous)
|
|||
* When LOG_CRITICAL, stripes with injournal == 0 will be sent to
|
||||
* no_space_stripes list.
|
||||
*
|
||||
* 3. during journal failure
|
||||
* In journal failure, we try to flush all cached data to raid disks
|
||||
* based on data in stripe cache. The array is read-only to upper
|
||||
* layers, so we would skip all pending writes.
|
||||
*
|
||||
*/
|
||||
static inline bool delay_towrite(struct r5conf *conf,
|
||||
struct r5dev *dev,
|
||||
|
@ -3068,6 +3072,9 @@ static inline bool delay_towrite(struct r5conf *conf,
|
|||
if (test_bit(R5C_LOG_CRITICAL, &conf->cache_state) &&
|
||||
s->injournal > 0)
|
||||
return true;
|
||||
/* case 3 above */
|
||||
if (s->log_failed && s->injournal)
|
||||
return true;
|
||||
return false;
|
||||
}
|
||||
|
||||
|
@ -4653,8 +4660,13 @@ static void handle_stripe(struct stripe_head *sh)
|
|||
|
||||
if (test_bit(STRIPE_SYNC_REQUESTED, &sh->state) && !sh->batch_head) {
|
||||
spin_lock(&sh->stripe_lock);
|
||||
/* Cannot process 'sync' concurrently with 'discard' */
|
||||
if (!test_bit(STRIPE_DISCARD, &sh->state) &&
|
||||
/*
|
||||
* Cannot process 'sync' concurrently with 'discard'.
|
||||
* Flush data in r5cache before 'sync'.
|
||||
*/
|
||||
if (!test_bit(STRIPE_R5C_PARTIAL_STRIPE, &sh->state) &&
|
||||
!test_bit(STRIPE_R5C_FULL_STRIPE, &sh->state) &&
|
||||
!test_bit(STRIPE_DISCARD, &sh->state) &&
|
||||
test_and_clear_bit(STRIPE_SYNC_REQUESTED, &sh->state)) {
|
||||
set_bit(STRIPE_SYNCING, &sh->state);
|
||||
clear_bit(STRIPE_INSYNC, &sh->state);
|
||||
|
@ -4701,10 +4713,15 @@ static void handle_stripe(struct stripe_head *sh)
|
|||
" to_write=%d failed=%d failed_num=%d,%d\n",
|
||||
s.locked, s.uptodate, s.to_read, s.to_write, s.failed,
|
||||
s.failed_num[0], s.failed_num[1]);
|
||||
/* check if the array has lost more than max_degraded devices and,
|
||||
/*
|
||||
* check if the array has lost more than max_degraded devices and,
|
||||
* if so, some requests might need to be failed.
|
||||
*
|
||||
* When journal device failed (log_failed), we will only process
|
||||
* the stripe if there is data need write to raid disks
|
||||
*/
|
||||
if (s.failed > conf->max_degraded || s.log_failed) {
|
||||
if (s.failed > conf->max_degraded ||
|
||||
(s.log_failed && s.injournal == 0)) {
|
||||
sh->check_state = 0;
|
||||
sh->reconstruct_state = 0;
|
||||
break_stripe_batch_list(sh, 0);
|
||||
|
@ -5277,8 +5294,10 @@ static struct stripe_head *__get_priority_stripe(struct r5conf *conf, int group)
|
|||
struct stripe_head *sh, *tmp;
|
||||
struct list_head *handle_list = NULL;
|
||||
struct r5worker_group *wg;
|
||||
bool second_try = !r5c_is_writeback(conf->log);
|
||||
bool try_loprio = test_bit(R5C_LOG_TIGHT, &conf->cache_state);
|
||||
bool second_try = !r5c_is_writeback(conf->log) &&
|
||||
!r5l_log_disk_error(conf);
|
||||
bool try_loprio = test_bit(R5C_LOG_TIGHT, &conf->cache_state) ||
|
||||
r5l_log_disk_error(conf);
|
||||
|
||||
again:
|
||||
wg = NULL;
|
||||
|
@ -6313,7 +6332,6 @@ int
|
|||
raid5_set_cache_size(struct mddev *mddev, int size)
|
||||
{
|
||||
struct r5conf *conf = mddev->private;
|
||||
int err;
|
||||
|
||||
if (size <= 16 || size > 32768)
|
||||
return -EINVAL;
|
||||
|
@ -6325,10 +6343,7 @@ raid5_set_cache_size(struct mddev *mddev, int size)
|
|||
;
|
||||
mutex_unlock(&conf->cache_size_mutex);
|
||||
|
||||
|
||||
err = md_allow_write(mddev);
|
||||
if (err)
|
||||
return err;
|
||||
md_allow_write(mddev);
|
||||
|
||||
mutex_lock(&conf->cache_size_mutex);
|
||||
while (size > conf->max_nr_stripes)
|
||||
|
@ -7530,7 +7545,9 @@ static int raid5_remove_disk(struct mddev *mddev, struct md_rdev *rdev)
|
|||
* neilb: there is no locking about new writes here,
|
||||
* so this cannot be safe.
|
||||
*/
|
||||
if (atomic_read(&conf->active_stripes)) {
|
||||
if (atomic_read(&conf->active_stripes) ||
|
||||
atomic_read(&conf->r5c_cached_full_stripes) ||
|
||||
atomic_read(&conf->r5c_cached_partial_stripes)) {
|
||||
return -EBUSY;
|
||||
}
|
||||
log_exit(conf);
|
||||
|
|
Loading…
Reference in New Issue