- DM verity fix for crash due to using vmalloc'd buffers with the
asynchronous crypto hadsh API. - Fix to both DM crypt and DM integrity targets to discontinue using CRYPTO_TFM_REQ_MAY_SLEEP because its use of GFP_KERNEL can lead to deadlock by recursing back into a filesystem. - Various DM raid fixes related to reshape and rebuild races. - Fix for DM thin-provisioning to avoid data corruption that was a side-effect of needing to abort DM thin metadata transaction due to running out of metadata space. Fix is to reserve a small amount of metadata space so that once it is used the DM thin-pool can finish its active transaction before switching to read-only mode. -----BEGIN PGP SIGNATURE----- Version: GnuPG v1 iQEcBAABAgAGBQJbmq1sAAoJEMUj8QotnQNa9hwIANBzgefmIoBA8hCqlS7Nzogh moHAbaUONYxAVksJ5s4Divpa4RE8jSZ8AiVJizbLj94XxqnpA5x4pmhtUs3OdWxq 4FW8fZvb2L1IMeshajMsO+rKrL6y8uwYnRI5zSJRKxhV2frj1VZ4ioMEXMGrwqjO jwNYFWglX5G3eIohUIDxgXki/WkacndQlsy2T6Utx0idz50usOt1pNON/GPhRsXd yWbvxQWuaZlm6+TF1ygsrXr1nMwRwDw0pdjY9sRQ3hHmuYQ24Wgkwf5Ggi5US7VZ iwgK8dppR8zWqwRyxRYahJvQvwcnC18mewFAmuDQfvPXqvUHtuhWaxzE4D9c3L0= =yIoD -----END PGP SIGNATURE----- Merge tag 'for-4.19/dm-fixes' of git://git.kernel.org/pub/scm/linux/kernel/git/device-mapper/linux-dm Pull device mapper fixes from Mike Snitzer: - DM verity fix for crash due to using vmalloc'd buffers with the asynchronous crypto hadsh API. - Fix to both DM crypt and DM integrity targets to discontinue using CRYPTO_TFM_REQ_MAY_SLEEP because its use of GFP_KERNEL can lead to deadlock by recursing back into a filesystem. - Various DM raid fixes related to reshape and rebuild races. - Fix for DM thin-provisioning to avoid data corruption that was a side-effect of needing to abort DM thin metadata transaction due to running out of metadata space. Fix is to reserve a small amount of metadata space so that once it is used the DM thin-pool can finish its active transaction before switching to read-only mode. * tag 'for-4.19/dm-fixes' of git://git.kernel.org/pub/scm/linux/kernel/git/device-mapper/linux-dm: dm thin metadata: try to avoid ever aborting transactions dm raid: bump target version, update comments and documentation dm raid: fix RAID leg rebuild errors dm raid: fix rebuild of specific devices by updating superblock dm raid: fix stripe adding reshape deadlock dm raid: fix reshape race on small devices dm: disable CRYPTO_TFM_REQ_MAY_SLEEP to fix a GFP_KERNEL recursion deadlock dm verity: fix crash on bufio buffer that was allocated with vmalloc
This commit is contained in:
commit
a0efc03b79
|
@ -348,3 +348,7 @@ Version History
|
|||
1.13.1 Fix deadlock caused by early md_stop_writes(). Also fix size an
|
||||
state races.
|
||||
1.13.2 Fix raid redundancy validation and avoid keeping raid set frozen
|
||||
1.14.0 Fix reshape race on small devices. Fix stripe adding reshape
|
||||
deadlock/potential data corruption. Update superblock when
|
||||
specific devices are requested via rebuild. Fix RAID leg
|
||||
rebuild errors.
|
||||
|
|
|
@ -332,7 +332,7 @@ static int crypt_iv_essiv_init(struct crypt_config *cc)
|
|||
int err;
|
||||
|
||||
desc->tfm = essiv->hash_tfm;
|
||||
desc->flags = CRYPTO_TFM_REQ_MAY_SLEEP;
|
||||
desc->flags = 0;
|
||||
|
||||
err = crypto_shash_digest(desc, cc->key, cc->key_size, essiv->salt);
|
||||
shash_desc_zero(desc);
|
||||
|
@ -606,7 +606,7 @@ static int crypt_iv_lmk_one(struct crypt_config *cc, u8 *iv,
|
|||
int i, r;
|
||||
|
||||
desc->tfm = lmk->hash_tfm;
|
||||
desc->flags = CRYPTO_TFM_REQ_MAY_SLEEP;
|
||||
desc->flags = 0;
|
||||
|
||||
r = crypto_shash_init(desc);
|
||||
if (r)
|
||||
|
@ -768,7 +768,7 @@ static int crypt_iv_tcw_whitening(struct crypt_config *cc,
|
|||
|
||||
/* calculate crc32 for every 32bit part and xor it */
|
||||
desc->tfm = tcw->crc32_tfm;
|
||||
desc->flags = CRYPTO_TFM_REQ_MAY_SLEEP;
|
||||
desc->flags = 0;
|
||||
for (i = 0; i < 4; i++) {
|
||||
r = crypto_shash_init(desc);
|
||||
if (r)
|
||||
|
@ -1251,7 +1251,7 @@ static void crypt_alloc_req_skcipher(struct crypt_config *cc,
|
|||
* requests if driver request queue is full.
|
||||
*/
|
||||
skcipher_request_set_callback(ctx->r.req,
|
||||
CRYPTO_TFM_REQ_MAY_BACKLOG | CRYPTO_TFM_REQ_MAY_SLEEP,
|
||||
CRYPTO_TFM_REQ_MAY_BACKLOG,
|
||||
kcryptd_async_done, dmreq_of_req(cc, ctx->r.req));
|
||||
}
|
||||
|
||||
|
@ -1268,7 +1268,7 @@ static void crypt_alloc_req_aead(struct crypt_config *cc,
|
|||
* requests if driver request queue is full.
|
||||
*/
|
||||
aead_request_set_callback(ctx->r.req_aead,
|
||||
CRYPTO_TFM_REQ_MAY_BACKLOG | CRYPTO_TFM_REQ_MAY_SLEEP,
|
||||
CRYPTO_TFM_REQ_MAY_BACKLOG,
|
||||
kcryptd_async_done, dmreq_of_req(cc, ctx->r.req_aead));
|
||||
}
|
||||
|
||||
|
|
|
@ -532,7 +532,7 @@ static void section_mac(struct dm_integrity_c *ic, unsigned section, __u8 result
|
|||
unsigned j, size;
|
||||
|
||||
desc->tfm = ic->journal_mac;
|
||||
desc->flags = CRYPTO_TFM_REQ_MAY_SLEEP;
|
||||
desc->flags = 0;
|
||||
|
||||
r = crypto_shash_init(desc);
|
||||
if (unlikely(r)) {
|
||||
|
@ -676,7 +676,7 @@ static void complete_journal_encrypt(struct crypto_async_request *req, int err)
|
|||
static bool do_crypt(bool encrypt, struct skcipher_request *req, struct journal_completion *comp)
|
||||
{
|
||||
int r;
|
||||
skcipher_request_set_callback(req, CRYPTO_TFM_REQ_MAY_BACKLOG | CRYPTO_TFM_REQ_MAY_SLEEP,
|
||||
skcipher_request_set_callback(req, CRYPTO_TFM_REQ_MAY_BACKLOG,
|
||||
complete_journal_encrypt, comp);
|
||||
if (likely(encrypt))
|
||||
r = crypto_skcipher_encrypt(req);
|
||||
|
|
|
@ -1,6 +1,6 @@
|
|||
/*
|
||||
* Copyright (C) 2010-2011 Neil Brown
|
||||
* Copyright (C) 2010-2017 Red Hat, Inc. All rights reserved.
|
||||
* Copyright (C) 2010-2018 Red Hat, Inc. All rights reserved.
|
||||
*
|
||||
* This file is released under the GPL.
|
||||
*/
|
||||
|
@ -29,9 +29,6 @@
|
|||
*/
|
||||
#define MIN_RAID456_JOURNAL_SPACE (4*2048)
|
||||
|
||||
/* Global list of all raid sets */
|
||||
static LIST_HEAD(raid_sets);
|
||||
|
||||
static bool devices_handle_discard_safely = false;
|
||||
|
||||
/*
|
||||
|
@ -227,7 +224,6 @@ struct rs_layout {
|
|||
|
||||
struct raid_set {
|
||||
struct dm_target *ti;
|
||||
struct list_head list;
|
||||
|
||||
uint32_t stripe_cache_entries;
|
||||
unsigned long ctr_flags;
|
||||
|
@ -273,19 +269,6 @@ static void rs_config_restore(struct raid_set *rs, struct rs_layout *l)
|
|||
mddev->new_chunk_sectors = l->new_chunk_sectors;
|
||||
}
|
||||
|
||||
/* Find any raid_set in active slot for @rs on global list */
|
||||
static struct raid_set *rs_find_active(struct raid_set *rs)
|
||||
{
|
||||
struct raid_set *r;
|
||||
struct mapped_device *md = dm_table_get_md(rs->ti->table);
|
||||
|
||||
list_for_each_entry(r, &raid_sets, list)
|
||||
if (r != rs && dm_table_get_md(r->ti->table) == md)
|
||||
return r;
|
||||
|
||||
return NULL;
|
||||
}
|
||||
|
||||
/* raid10 algorithms (i.e. formats) */
|
||||
#define ALGORITHM_RAID10_DEFAULT 0
|
||||
#define ALGORITHM_RAID10_NEAR 1
|
||||
|
@ -764,7 +747,6 @@ static struct raid_set *raid_set_alloc(struct dm_target *ti, struct raid_type *r
|
|||
|
||||
mddev_init(&rs->md);
|
||||
|
||||
INIT_LIST_HEAD(&rs->list);
|
||||
rs->raid_disks = raid_devs;
|
||||
rs->delta_disks = 0;
|
||||
|
||||
|
@ -782,9 +764,6 @@ static struct raid_set *raid_set_alloc(struct dm_target *ti, struct raid_type *r
|
|||
for (i = 0; i < raid_devs; i++)
|
||||
md_rdev_init(&rs->dev[i].rdev);
|
||||
|
||||
/* Add @rs to global list. */
|
||||
list_add(&rs->list, &raid_sets);
|
||||
|
||||
/*
|
||||
* Remaining items to be initialized by further RAID params:
|
||||
* rs->md.persistent
|
||||
|
@ -797,7 +776,7 @@ static struct raid_set *raid_set_alloc(struct dm_target *ti, struct raid_type *r
|
|||
return rs;
|
||||
}
|
||||
|
||||
/* Free all @rs allocations and remove it from global list. */
|
||||
/* Free all @rs allocations */
|
||||
static void raid_set_free(struct raid_set *rs)
|
||||
{
|
||||
int i;
|
||||
|
@ -815,8 +794,6 @@ static void raid_set_free(struct raid_set *rs)
|
|||
dm_put_device(rs->ti, rs->dev[i].data_dev);
|
||||
}
|
||||
|
||||
list_del(&rs->list);
|
||||
|
||||
kfree(rs);
|
||||
}
|
||||
|
||||
|
@ -2649,7 +2626,7 @@ static int rs_adjust_data_offsets(struct raid_set *rs)
|
|||
return 0;
|
||||
}
|
||||
|
||||
/* HM FIXME: get InSync raid_dev? */
|
||||
/* HM FIXME: get In_Sync raid_dev? */
|
||||
rdev = &rs->dev[0].rdev;
|
||||
|
||||
if (rs->delta_disks < 0) {
|
||||
|
@ -3149,6 +3126,11 @@ static int raid_ctr(struct dm_target *ti, unsigned int argc, char **argv)
|
|||
set_bit(RT_FLAG_UPDATE_SBS, &rs->runtime_flags);
|
||||
rs_set_new(rs);
|
||||
} else if (rs_is_recovering(rs)) {
|
||||
/* Rebuild particular devices */
|
||||
if (test_bit(__CTR_FLAG_REBUILD, &rs->ctr_flags)) {
|
||||
set_bit(RT_FLAG_UPDATE_SBS, &rs->runtime_flags);
|
||||
rs_setup_recovery(rs, MaxSector);
|
||||
}
|
||||
/* A recovering raid set may be resized */
|
||||
; /* skip setup rs */
|
||||
} else if (rs_is_reshaping(rs)) {
|
||||
|
@ -3242,6 +3224,8 @@ static int raid_ctr(struct dm_target *ti, unsigned int argc, char **argv)
|
|||
/* Start raid set read-only and assumed clean to change in raid_resume() */
|
||||
rs->md.ro = 1;
|
||||
rs->md.in_sync = 1;
|
||||
|
||||
/* Keep array frozen */
|
||||
set_bit(MD_RECOVERY_FROZEN, &rs->md.recovery);
|
||||
|
||||
/* Has to be held on running the array */
|
||||
|
@ -3265,7 +3249,7 @@ static int raid_ctr(struct dm_target *ti, unsigned int argc, char **argv)
|
|||
rs->callbacks.congested_fn = raid_is_congested;
|
||||
dm_table_add_target_callbacks(ti->table, &rs->callbacks);
|
||||
|
||||
/* If raid4/5/6 journal mode explictely requested (only possible with journal dev) -> set it */
|
||||
/* If raid4/5/6 journal mode explicitly requested (only possible with journal dev) -> set it */
|
||||
if (test_bit(__CTR_FLAG_JOURNAL_MODE, &rs->ctr_flags)) {
|
||||
r = r5c_journal_mode_set(&rs->md, rs->journal_dev.mode);
|
||||
if (r) {
|
||||
|
@ -3350,32 +3334,53 @@ static int raid_map(struct dm_target *ti, struct bio *bio)
|
|||
return DM_MAPIO_SUBMITTED;
|
||||
}
|
||||
|
||||
/* Return string describing the current sync action of @mddev */
|
||||
static const char *decipher_sync_action(struct mddev *mddev, unsigned long recovery)
|
||||
/* Return sync state string for @state */
|
||||
enum sync_state { st_frozen, st_reshape, st_resync, st_check, st_repair, st_recover, st_idle };
|
||||
static const char *sync_str(enum sync_state state)
|
||||
{
|
||||
/* Has to be in above sync_state order! */
|
||||
static const char *sync_strs[] = {
|
||||
"frozen",
|
||||
"reshape",
|
||||
"resync",
|
||||
"check",
|
||||
"repair",
|
||||
"recover",
|
||||
"idle"
|
||||
};
|
||||
|
||||
return __within_range(state, 0, ARRAY_SIZE(sync_strs) - 1) ? sync_strs[state] : "undef";
|
||||
};
|
||||
|
||||
/* Return enum sync_state for @mddev derived from @recovery flags */
|
||||
static const enum sync_state decipher_sync_action(struct mddev *mddev, unsigned long recovery)
|
||||
{
|
||||
if (test_bit(MD_RECOVERY_FROZEN, &recovery))
|
||||
return "frozen";
|
||||
return st_frozen;
|
||||
|
||||
/* The MD sync thread can be done with io but still be running */
|
||||
/* The MD sync thread can be done with io or be interrupted but still be running */
|
||||
if (!test_bit(MD_RECOVERY_DONE, &recovery) &&
|
||||
(test_bit(MD_RECOVERY_RUNNING, &recovery) ||
|
||||
(!mddev->ro && test_bit(MD_RECOVERY_NEEDED, &recovery)))) {
|
||||
if (test_bit(MD_RECOVERY_RESHAPE, &recovery))
|
||||
return "reshape";
|
||||
return st_reshape;
|
||||
|
||||
if (test_bit(MD_RECOVERY_SYNC, &recovery)) {
|
||||
if (!test_bit(MD_RECOVERY_REQUESTED, &recovery))
|
||||
return "resync";
|
||||
else if (test_bit(MD_RECOVERY_CHECK, &recovery))
|
||||
return "check";
|
||||
return "repair";
|
||||
return st_resync;
|
||||
if (test_bit(MD_RECOVERY_CHECK, &recovery))
|
||||
return st_check;
|
||||
return st_repair;
|
||||
}
|
||||
|
||||
if (test_bit(MD_RECOVERY_RECOVER, &recovery))
|
||||
return "recover";
|
||||
return st_recover;
|
||||
|
||||
if (mddev->reshape_position != MaxSector)
|
||||
return st_reshape;
|
||||
}
|
||||
|
||||
return "idle";
|
||||
return st_idle;
|
||||
}
|
||||
|
||||
/*
|
||||
|
@ -3409,6 +3414,7 @@ static sector_t rs_get_progress(struct raid_set *rs, unsigned long recovery,
|
|||
sector_t resync_max_sectors)
|
||||
{
|
||||
sector_t r;
|
||||
enum sync_state state;
|
||||
struct mddev *mddev = &rs->md;
|
||||
|
||||
clear_bit(RT_FLAG_RS_IN_SYNC, &rs->runtime_flags);
|
||||
|
@ -3419,20 +3425,14 @@ static sector_t rs_get_progress(struct raid_set *rs, unsigned long recovery,
|
|||
set_bit(RT_FLAG_RS_IN_SYNC, &rs->runtime_flags);
|
||||
|
||||
} else {
|
||||
if (!test_bit(__CTR_FLAG_NOSYNC, &rs->ctr_flags) &&
|
||||
!test_bit(MD_RECOVERY_INTR, &recovery) &&
|
||||
(test_bit(MD_RECOVERY_NEEDED, &recovery) ||
|
||||
test_bit(MD_RECOVERY_RESHAPE, &recovery) ||
|
||||
test_bit(MD_RECOVERY_RUNNING, &recovery)))
|
||||
r = mddev->curr_resync_completed;
|
||||
else
|
||||
r = mddev->recovery_cp;
|
||||
state = decipher_sync_action(mddev, recovery);
|
||||
|
||||
if (r >= resync_max_sectors &&
|
||||
(!test_bit(MD_RECOVERY_REQUESTED, &recovery) ||
|
||||
(!test_bit(MD_RECOVERY_FROZEN, &recovery) &&
|
||||
!test_bit(MD_RECOVERY_NEEDED, &recovery) &&
|
||||
!test_bit(MD_RECOVERY_RUNNING, &recovery)))) {
|
||||
if (state == st_idle && !test_bit(MD_RECOVERY_INTR, &recovery))
|
||||
r = mddev->recovery_cp;
|
||||
else
|
||||
r = mddev->curr_resync_completed;
|
||||
|
||||
if (state == st_idle && r >= resync_max_sectors) {
|
||||
/*
|
||||
* Sync complete.
|
||||
*/
|
||||
|
@ -3440,24 +3440,20 @@ static sector_t rs_get_progress(struct raid_set *rs, unsigned long recovery,
|
|||
if (test_bit(MD_RECOVERY_RECOVER, &recovery))
|
||||
set_bit(RT_FLAG_RS_IN_SYNC, &rs->runtime_flags);
|
||||
|
||||
} else if (test_bit(MD_RECOVERY_RECOVER, &recovery)) {
|
||||
} else if (state == st_recover)
|
||||
/*
|
||||
* In case we are recovering, the array is not in sync
|
||||
* and health chars should show the recovering legs.
|
||||
*/
|
||||
;
|
||||
|
||||
} else if (test_bit(MD_RECOVERY_SYNC, &recovery) &&
|
||||
!test_bit(MD_RECOVERY_REQUESTED, &recovery)) {
|
||||
else if (state == st_resync)
|
||||
/*
|
||||
* If "resync" is occurring, the raid set
|
||||
* is or may be out of sync hence the health
|
||||
* characters shall be 'a'.
|
||||
*/
|
||||
set_bit(RT_FLAG_RS_RESYNCING, &rs->runtime_flags);
|
||||
|
||||
} else if (test_bit(MD_RECOVERY_RESHAPE, &recovery) &&
|
||||
!test_bit(MD_RECOVERY_REQUESTED, &recovery)) {
|
||||
else if (state == st_reshape)
|
||||
/*
|
||||
* If "reshape" is occurring, the raid set
|
||||
* is or may be out of sync hence the health
|
||||
|
@ -3465,7 +3461,7 @@ static sector_t rs_get_progress(struct raid_set *rs, unsigned long recovery,
|
|||
*/
|
||||
set_bit(RT_FLAG_RS_RESYNCING, &rs->runtime_flags);
|
||||
|
||||
} else if (test_bit(MD_RECOVERY_REQUESTED, &recovery)) {
|
||||
else if (state == st_check || state == st_repair)
|
||||
/*
|
||||
* If "check" or "repair" is occurring, the raid set has
|
||||
* undergone an initial sync and the health characters
|
||||
|
@ -3473,12 +3469,12 @@ static sector_t rs_get_progress(struct raid_set *rs, unsigned long recovery,
|
|||
*/
|
||||
set_bit(RT_FLAG_RS_IN_SYNC, &rs->runtime_flags);
|
||||
|
||||
} else {
|
||||
else {
|
||||
struct md_rdev *rdev;
|
||||
|
||||
/*
|
||||
* We are idle and recovery is needed, prevent 'A' chars race
|
||||
* caused by components still set to in-sync by constrcuctor.
|
||||
* caused by components still set to in-sync by constructor.
|
||||
*/
|
||||
if (test_bit(MD_RECOVERY_NEEDED, &recovery))
|
||||
set_bit(RT_FLAG_RS_RESYNCING, &rs->runtime_flags);
|
||||
|
@ -3542,7 +3538,7 @@ static void raid_status(struct dm_target *ti, status_type_t type,
|
|||
progress = rs_get_progress(rs, recovery, resync_max_sectors);
|
||||
resync_mismatches = (mddev->last_sync_action && !strcasecmp(mddev->last_sync_action, "check")) ?
|
||||
atomic64_read(&mddev->resync_mismatches) : 0;
|
||||
sync_action = decipher_sync_action(&rs->md, recovery);
|
||||
sync_action = sync_str(decipher_sync_action(&rs->md, recovery));
|
||||
|
||||
/* HM FIXME: do we want another state char for raid0? It shows 'D'/'A'/'-' now */
|
||||
for (i = 0; i < rs->raid_disks; i++)
|
||||
|
@ -3892,14 +3888,13 @@ static int rs_start_reshape(struct raid_set *rs)
|
|||
struct mddev *mddev = &rs->md;
|
||||
struct md_personality *pers = mddev->pers;
|
||||
|
||||
/* Don't allow the sync thread to work until the table gets reloaded. */
|
||||
set_bit(MD_RECOVERY_WAIT, &mddev->recovery);
|
||||
|
||||
r = rs_setup_reshape(rs);
|
||||
if (r)
|
||||
return r;
|
||||
|
||||
/* Need to be resumed to be able to start reshape, recovery is frozen until raid_resume() though */
|
||||
if (test_and_clear_bit(RT_FLAG_RS_SUSPENDED, &rs->runtime_flags))
|
||||
mddev_resume(mddev);
|
||||
|
||||
/*
|
||||
* Check any reshape constraints enforced by the personalility
|
||||
*
|
||||
|
@ -3923,10 +3918,6 @@ static int rs_start_reshape(struct raid_set *rs)
|
|||
}
|
||||
}
|
||||
|
||||
/* Suspend because a resume will happen in raid_resume() */
|
||||
set_bit(RT_FLAG_RS_SUSPENDED, &rs->runtime_flags);
|
||||
mddev_suspend(mddev);
|
||||
|
||||
/*
|
||||
* Now reshape got set up, update superblocks to
|
||||
* reflect the fact so that a table reload will
|
||||
|
@ -3947,29 +3938,6 @@ static int raid_preresume(struct dm_target *ti)
|
|||
if (test_and_set_bit(RT_FLAG_RS_PRERESUMED, &rs->runtime_flags))
|
||||
return 0;
|
||||
|
||||
if (!test_bit(__CTR_FLAG_REBUILD, &rs->ctr_flags)) {
|
||||
struct raid_set *rs_active = rs_find_active(rs);
|
||||
|
||||
if (rs_active) {
|
||||
/*
|
||||
* In case no rebuilds have been requested
|
||||
* and an active table slot exists, copy
|
||||
* current resynchonization completed and
|
||||
* reshape position pointers across from
|
||||
* suspended raid set in the active slot.
|
||||
*
|
||||
* This resumes the new mapping at current
|
||||
* offsets to continue recover/reshape without
|
||||
* necessarily redoing a raid set partially or
|
||||
* causing data corruption in case of a reshape.
|
||||
*/
|
||||
if (rs_active->md.curr_resync_completed != MaxSector)
|
||||
mddev->curr_resync_completed = rs_active->md.curr_resync_completed;
|
||||
if (rs_active->md.reshape_position != MaxSector)
|
||||
mddev->reshape_position = rs_active->md.reshape_position;
|
||||
}
|
||||
}
|
||||
|
||||
/*
|
||||
* The superblocks need to be updated on disk if the
|
||||
* array is new or new devices got added (thus zeroed
|
||||
|
@ -4046,7 +4014,7 @@ static void raid_resume(struct dm_target *ti)
|
|||
|
||||
static struct target_type raid_target = {
|
||||
.name = "raid",
|
||||
.version = {1, 13, 2},
|
||||
.version = {1, 14, 0},
|
||||
.module = THIS_MODULE,
|
||||
.ctr = raid_ctr,
|
||||
.dtr = raid_dtr,
|
||||
|
|
|
@ -188,6 +188,12 @@ struct dm_pool_metadata {
|
|||
unsigned long flags;
|
||||
sector_t data_block_size;
|
||||
|
||||
/*
|
||||
* We reserve a section of the metadata for commit overhead.
|
||||
* All reported space does *not* include this.
|
||||
*/
|
||||
dm_block_t metadata_reserve;
|
||||
|
||||
/*
|
||||
* Set if a transaction has to be aborted but the attempt to roll back
|
||||
* to the previous (good) transaction failed. The only pool metadata
|
||||
|
@ -816,6 +822,22 @@ static int __commit_transaction(struct dm_pool_metadata *pmd)
|
|||
return dm_tm_commit(pmd->tm, sblock);
|
||||
}
|
||||
|
||||
static void __set_metadata_reserve(struct dm_pool_metadata *pmd)
|
||||
{
|
||||
int r;
|
||||
dm_block_t total;
|
||||
dm_block_t max_blocks = 4096; /* 16M */
|
||||
|
||||
r = dm_sm_get_nr_blocks(pmd->metadata_sm, &total);
|
||||
if (r) {
|
||||
DMERR("could not get size of metadata device");
|
||||
pmd->metadata_reserve = max_blocks;
|
||||
} else {
|
||||
sector_div(total, 10);
|
||||
pmd->metadata_reserve = min(max_blocks, total);
|
||||
}
|
||||
}
|
||||
|
||||
struct dm_pool_metadata *dm_pool_metadata_open(struct block_device *bdev,
|
||||
sector_t data_block_size,
|
||||
bool format_device)
|
||||
|
@ -849,6 +871,8 @@ struct dm_pool_metadata *dm_pool_metadata_open(struct block_device *bdev,
|
|||
return ERR_PTR(r);
|
||||
}
|
||||
|
||||
__set_metadata_reserve(pmd);
|
||||
|
||||
return pmd;
|
||||
}
|
||||
|
||||
|
@ -1820,6 +1844,13 @@ int dm_pool_get_free_metadata_block_count(struct dm_pool_metadata *pmd,
|
|||
down_read(&pmd->root_lock);
|
||||
if (!pmd->fail_io)
|
||||
r = dm_sm_get_nr_free(pmd->metadata_sm, result);
|
||||
|
||||
if (!r) {
|
||||
if (*result < pmd->metadata_reserve)
|
||||
*result = 0;
|
||||
else
|
||||
*result -= pmd->metadata_reserve;
|
||||
}
|
||||
up_read(&pmd->root_lock);
|
||||
|
||||
return r;
|
||||
|
@ -1932,8 +1963,11 @@ int dm_pool_resize_metadata_dev(struct dm_pool_metadata *pmd, dm_block_t new_cou
|
|||
int r = -EINVAL;
|
||||
|
||||
down_write(&pmd->root_lock);
|
||||
if (!pmd->fail_io)
|
||||
if (!pmd->fail_io) {
|
||||
r = __resize_space_map(pmd->metadata_sm, new_count);
|
||||
if (!r)
|
||||
__set_metadata_reserve(pmd);
|
||||
}
|
||||
up_write(&pmd->root_lock);
|
||||
|
||||
return r;
|
||||
|
|
|
@ -200,7 +200,13 @@ struct dm_thin_new_mapping;
|
|||
enum pool_mode {
|
||||
PM_WRITE, /* metadata may be changed */
|
||||
PM_OUT_OF_DATA_SPACE, /* metadata may be changed, though data may not be allocated */
|
||||
|
||||
/*
|
||||
* Like READ_ONLY, except may switch back to WRITE on metadata resize. Reported as READ_ONLY.
|
||||
*/
|
||||
PM_OUT_OF_METADATA_SPACE,
|
||||
PM_READ_ONLY, /* metadata may not be changed */
|
||||
|
||||
PM_FAIL, /* all I/O fails */
|
||||
};
|
||||
|
||||
|
@ -1371,7 +1377,35 @@ static void set_pool_mode(struct pool *pool, enum pool_mode new_mode);
|
|||
|
||||
static void requeue_bios(struct pool *pool);
|
||||
|
||||
static void check_for_space(struct pool *pool)
|
||||
static bool is_read_only_pool_mode(enum pool_mode mode)
|
||||
{
|
||||
return (mode == PM_OUT_OF_METADATA_SPACE || mode == PM_READ_ONLY);
|
||||
}
|
||||
|
||||
static bool is_read_only(struct pool *pool)
|
||||
{
|
||||
return is_read_only_pool_mode(get_pool_mode(pool));
|
||||
}
|
||||
|
||||
static void check_for_metadata_space(struct pool *pool)
|
||||
{
|
||||
int r;
|
||||
const char *ooms_reason = NULL;
|
||||
dm_block_t nr_free;
|
||||
|
||||
r = dm_pool_get_free_metadata_block_count(pool->pmd, &nr_free);
|
||||
if (r)
|
||||
ooms_reason = "Could not get free metadata blocks";
|
||||
else if (!nr_free)
|
||||
ooms_reason = "No free metadata blocks";
|
||||
|
||||
if (ooms_reason && !is_read_only(pool)) {
|
||||
DMERR("%s", ooms_reason);
|
||||
set_pool_mode(pool, PM_OUT_OF_METADATA_SPACE);
|
||||
}
|
||||
}
|
||||
|
||||
static void check_for_data_space(struct pool *pool)
|
||||
{
|
||||
int r;
|
||||
dm_block_t nr_free;
|
||||
|
@ -1397,14 +1431,16 @@ static int commit(struct pool *pool)
|
|||
{
|
||||
int r;
|
||||
|
||||
if (get_pool_mode(pool) >= PM_READ_ONLY)
|
||||
if (get_pool_mode(pool) >= PM_OUT_OF_METADATA_SPACE)
|
||||
return -EINVAL;
|
||||
|
||||
r = dm_pool_commit_metadata(pool->pmd);
|
||||
if (r)
|
||||
metadata_operation_failed(pool, "dm_pool_commit_metadata", r);
|
||||
else
|
||||
check_for_space(pool);
|
||||
else {
|
||||
check_for_metadata_space(pool);
|
||||
check_for_data_space(pool);
|
||||
}
|
||||
|
||||
return r;
|
||||
}
|
||||
|
@ -1470,6 +1506,19 @@ static int alloc_data_block(struct thin_c *tc, dm_block_t *result)
|
|||
return r;
|
||||
}
|
||||
|
||||
r = dm_pool_get_free_metadata_block_count(pool->pmd, &free_blocks);
|
||||
if (r) {
|
||||
metadata_operation_failed(pool, "dm_pool_get_free_metadata_block_count", r);
|
||||
return r;
|
||||
}
|
||||
|
||||
if (!free_blocks) {
|
||||
/* Let's commit before we use up the metadata reserve. */
|
||||
r = commit(pool);
|
||||
if (r)
|
||||
return r;
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
|
@ -1501,6 +1550,7 @@ static blk_status_t should_error_unserviceable_bio(struct pool *pool)
|
|||
case PM_OUT_OF_DATA_SPACE:
|
||||
return pool->pf.error_if_no_space ? BLK_STS_NOSPC : 0;
|
||||
|
||||
case PM_OUT_OF_METADATA_SPACE:
|
||||
case PM_READ_ONLY:
|
||||
case PM_FAIL:
|
||||
return BLK_STS_IOERR;
|
||||
|
@ -2464,8 +2514,9 @@ static void set_pool_mode(struct pool *pool, enum pool_mode new_mode)
|
|||
error_retry_list(pool);
|
||||
break;
|
||||
|
||||
case PM_OUT_OF_METADATA_SPACE:
|
||||
case PM_READ_ONLY:
|
||||
if (old_mode != new_mode)
|
||||
if (!is_read_only_pool_mode(old_mode))
|
||||
notify_of_pool_mode_change(pool, "read-only");
|
||||
dm_pool_metadata_read_only(pool->pmd);
|
||||
pool->process_bio = process_bio_read_only;
|
||||
|
@ -3403,6 +3454,10 @@ static int maybe_resize_metadata_dev(struct dm_target *ti, bool *need_commit)
|
|||
DMINFO("%s: growing the metadata device from %llu to %llu blocks",
|
||||
dm_device_name(pool->pool_md),
|
||||
sb_metadata_dev_size, metadata_dev_size);
|
||||
|
||||
if (get_pool_mode(pool) == PM_OUT_OF_METADATA_SPACE)
|
||||
set_pool_mode(pool, PM_WRITE);
|
||||
|
||||
r = dm_pool_resize_metadata_dev(pool->pmd, metadata_dev_size);
|
||||
if (r) {
|
||||
metadata_operation_failed(pool, "dm_pool_resize_metadata_dev", r);
|
||||
|
@ -3707,7 +3762,7 @@ static int pool_message(struct dm_target *ti, unsigned argc, char **argv,
|
|||
struct pool_c *pt = ti->private;
|
||||
struct pool *pool = pt->pool;
|
||||
|
||||
if (get_pool_mode(pool) >= PM_READ_ONLY) {
|
||||
if (get_pool_mode(pool) >= PM_OUT_OF_METADATA_SPACE) {
|
||||
DMERR("%s: unable to service pool target messages in READ_ONLY or FAIL mode",
|
||||
dm_device_name(pool->pool_md));
|
||||
return -EOPNOTSUPP;
|
||||
|
@ -3781,6 +3836,7 @@ static void pool_status(struct dm_target *ti, status_type_t type,
|
|||
dm_block_t nr_blocks_data;
|
||||
dm_block_t nr_blocks_metadata;
|
||||
dm_block_t held_root;
|
||||
enum pool_mode mode;
|
||||
char buf[BDEVNAME_SIZE];
|
||||
char buf2[BDEVNAME_SIZE];
|
||||
struct pool_c *pt = ti->private;
|
||||
|
@ -3851,9 +3907,10 @@ static void pool_status(struct dm_target *ti, status_type_t type,
|
|||
else
|
||||
DMEMIT("- ");
|
||||
|
||||
if (pool->pf.mode == PM_OUT_OF_DATA_SPACE)
|
||||
mode = get_pool_mode(pool);
|
||||
if (mode == PM_OUT_OF_DATA_SPACE)
|
||||
DMEMIT("out_of_data_space ");
|
||||
else if (pool->pf.mode == PM_READ_ONLY)
|
||||
else if (is_read_only_pool_mode(mode))
|
||||
DMEMIT("ro ");
|
||||
else
|
||||
DMEMIT("rw ");
|
||||
|
|
|
@ -99,10 +99,26 @@ static int verity_hash_update(struct dm_verity *v, struct ahash_request *req,
|
|||
{
|
||||
struct scatterlist sg;
|
||||
|
||||
sg_init_one(&sg, data, len);
|
||||
ahash_request_set_crypt(req, &sg, NULL, len);
|
||||
|
||||
return crypto_wait_req(crypto_ahash_update(req), wait);
|
||||
if (likely(!is_vmalloc_addr(data))) {
|
||||
sg_init_one(&sg, data, len);
|
||||
ahash_request_set_crypt(req, &sg, NULL, len);
|
||||
return crypto_wait_req(crypto_ahash_update(req), wait);
|
||||
} else {
|
||||
do {
|
||||
int r;
|
||||
size_t this_step = min_t(size_t, len, PAGE_SIZE - offset_in_page(data));
|
||||
flush_kernel_vmap_range((void *)data, this_step);
|
||||
sg_init_table(&sg, 1);
|
||||
sg_set_page(&sg, vmalloc_to_page(data), this_step, offset_in_page(data));
|
||||
ahash_request_set_crypt(req, &sg, NULL, this_step);
|
||||
r = crypto_wait_req(crypto_ahash_update(req), wait);
|
||||
if (unlikely(r))
|
||||
return r;
|
||||
data += this_step;
|
||||
len -= this_step;
|
||||
} while (len);
|
||||
return 0;
|
||||
}
|
||||
}
|
||||
|
||||
/*
|
||||
|
|
Loading…
Reference in New Issue