dm integrity: add optional discard support
Add an argument "allow_discards" that enables discard processing on dm-integrity device. Discards are only allowed to devices using internal hash. When a block is discarded the integrity tag is filled with DISCARD_FILLER (0xf6) bytes. Signed-off-by: Mikulas Patocka <mpatocka@redhat.com> Signed-off-by: Mike Snitzer <snitzer@redhat.com>
This commit is contained in:
parent
1ac2c15a7b
commit
84597a44a9
|
@ -39,6 +39,7 @@
|
|||
#define RECALC_WRITE_SUPER 16
|
||||
#define BITMAP_BLOCK_SIZE 4096 /* don't change it */
|
||||
#define BITMAP_FLUSH_INTERVAL (10 * HZ)
|
||||
#define DISCARD_FILLER 0xf6
|
||||
|
||||
/*
|
||||
* Warning - DEBUG_PRINT prints security-sensitive data to the log,
|
||||
|
@ -257,6 +258,7 @@ struct dm_integrity_c {
|
|||
bool just_formatted;
|
||||
bool recalculate_flag;
|
||||
bool fix_padding;
|
||||
bool discard;
|
||||
|
||||
struct alg_spec internal_hash_alg;
|
||||
struct alg_spec journal_crypt_alg;
|
||||
|
@ -284,7 +286,7 @@ struct dm_integrity_io {
|
|||
struct work_struct work;
|
||||
|
||||
struct dm_integrity_c *ic;
|
||||
bool write;
|
||||
enum req_opf op;
|
||||
bool fua;
|
||||
|
||||
struct dm_integrity_range range;
|
||||
|
@ -1299,6 +1301,11 @@ static bool find_newer_committed_node(struct dm_integrity_c *ic, struct journal_
|
|||
static int dm_integrity_rw_tag(struct dm_integrity_c *ic, unsigned char *tag, sector_t *metadata_block,
|
||||
unsigned *metadata_offset, unsigned total_size, int op)
|
||||
{
|
||||
#define MAY_BE_FILLER 1
|
||||
#define MAY_BE_HASH 2
|
||||
unsigned hash_offset = 0;
|
||||
unsigned may_be = MAY_BE_HASH | (ic->discard ? MAY_BE_FILLER : 0);
|
||||
|
||||
do {
|
||||
unsigned char *data, *dp;
|
||||
struct dm_buffer *b;
|
||||
|
@ -1320,18 +1327,35 @@ static int dm_integrity_rw_tag(struct dm_integrity_c *ic, unsigned char *tag, se
|
|||
} else if (op == TAG_WRITE) {
|
||||
memcpy(dp, tag, to_copy);
|
||||
dm_bufio_mark_partial_buffer_dirty(b, *metadata_offset, *metadata_offset + to_copy);
|
||||
} else {
|
||||
} else {
|
||||
/* e.g.: op == TAG_CMP */
|
||||
if (unlikely(memcmp(dp, tag, to_copy))) {
|
||||
unsigned i;
|
||||
|
||||
for (i = 0; i < to_copy; i++) {
|
||||
if (dp[i] != tag[i])
|
||||
break;
|
||||
total_size--;
|
||||
if (likely(is_power_of_2(ic->tag_size))) {
|
||||
if (unlikely(memcmp(dp, tag, to_copy)))
|
||||
if (unlikely(!ic->discard) ||
|
||||
unlikely(!memchr_inv(dp, DISCARD_FILLER, to_copy))) {
|
||||
goto thorough_test;
|
||||
}
|
||||
} else {
|
||||
unsigned i, ts;
|
||||
thorough_test:
|
||||
ts = total_size;
|
||||
|
||||
for (i = 0; i < to_copy; i++, ts--) {
|
||||
if (unlikely(dp[i] != tag[i]))
|
||||
may_be &= ~MAY_BE_HASH;
|
||||
if (likely(dp[i] != DISCARD_FILLER))
|
||||
may_be &= ~MAY_BE_FILLER;
|
||||
hash_offset++;
|
||||
if (unlikely(hash_offset == ic->tag_size)) {
|
||||
if (unlikely(!may_be)) {
|
||||
dm_bufio_release(b);
|
||||
return ts;
|
||||
}
|
||||
hash_offset = 0;
|
||||
may_be = MAY_BE_HASH | (ic->discard ? MAY_BE_FILLER : 0);
|
||||
}
|
||||
}
|
||||
dm_bufio_release(b);
|
||||
return total_size;
|
||||
}
|
||||
}
|
||||
dm_bufio_release(b);
|
||||
|
@ -1342,10 +1366,17 @@ static int dm_integrity_rw_tag(struct dm_integrity_c *ic, unsigned char *tag, se
|
|||
(*metadata_block)++;
|
||||
*metadata_offset = 0;
|
||||
}
|
||||
|
||||
if (unlikely(!is_power_of_2(ic->tag_size))) {
|
||||
hash_offset = (hash_offset + to_copy) % ic->tag_size;
|
||||
}
|
||||
|
||||
total_size -= to_copy;
|
||||
} while (unlikely(total_size));
|
||||
|
||||
return 0;
|
||||
#undef MAY_BE_FILLER
|
||||
#undef MAY_BE_HASH
|
||||
}
|
||||
|
||||
static void dm_integrity_flush_buffers(struct dm_integrity_c *ic)
|
||||
|
@ -1428,7 +1459,7 @@ static void dec_in_flight(struct dm_integrity_io *dio)
|
|||
|
||||
remove_range(ic, &dio->range);
|
||||
|
||||
if (unlikely(dio->write))
|
||||
if (dio->op == REQ_OP_WRITE || unlikely(dio->op == REQ_OP_DISCARD))
|
||||
schedule_autocommit(ic);
|
||||
|
||||
bio = dm_bio_from_per_bio_data(dio, sizeof(struct dm_integrity_io));
|
||||
|
@ -1520,14 +1551,19 @@ static void integrity_metadata(struct work_struct *w)
|
|||
char *checksums;
|
||||
unsigned extra_space = unlikely(digest_size > ic->tag_size) ? digest_size - ic->tag_size : 0;
|
||||
char checksums_onstack[max((size_t)HASH_MAX_DIGESTSIZE, MAX_TAG_SIZE)];
|
||||
unsigned sectors_to_process = dio->range.n_sectors;
|
||||
sector_t sector = dio->range.logical_sector;
|
||||
sector_t sector;
|
||||
unsigned sectors_to_process;
|
||||
sector_t save_metadata_block;
|
||||
unsigned save_metadata_offset;
|
||||
|
||||
if (unlikely(ic->mode == 'R'))
|
||||
goto skip_io;
|
||||
|
||||
checksums = kmalloc((PAGE_SIZE >> SECTOR_SHIFT >> ic->sb->log2_sectors_per_block) * ic->tag_size + extra_space,
|
||||
GFP_NOIO | __GFP_NORETRY | __GFP_NOWARN);
|
||||
if (likely(dio->op != REQ_OP_DISCARD))
|
||||
checksums = kmalloc((PAGE_SIZE >> SECTOR_SHIFT >> ic->sb->log2_sectors_per_block) * ic->tag_size + extra_space,
|
||||
GFP_NOIO | __GFP_NORETRY | __GFP_NOWARN);
|
||||
else
|
||||
checksums = kmalloc(PAGE_SIZE, GFP_NOIO | __GFP_NORETRY | __GFP_NOWARN);
|
||||
if (!checksums) {
|
||||
checksums = checksums_onstack;
|
||||
if (WARN_ON(extra_space &&
|
||||
|
@ -1537,6 +1573,43 @@ static void integrity_metadata(struct work_struct *w)
|
|||
}
|
||||
}
|
||||
|
||||
if (unlikely(dio->op == REQ_OP_DISCARD)) {
|
||||
sector_t bi_sector = dio->bio_details.bi_iter.bi_sector;
|
||||
unsigned bi_size = dio->bio_details.bi_iter.bi_size;
|
||||
unsigned max_size = likely(checksums != checksums_onstack) ? PAGE_SIZE : HASH_MAX_DIGESTSIZE;
|
||||
unsigned max_blocks = max_size / ic->tag_size;
|
||||
memset(checksums, DISCARD_FILLER, max_size);
|
||||
|
||||
while (bi_size) {
|
||||
unsigned this_step_blocks = bi_size >> (SECTOR_SHIFT + ic->sb->log2_sectors_per_block);
|
||||
this_step_blocks = min(this_step_blocks, max_blocks);
|
||||
r = dm_integrity_rw_tag(ic, checksums, &dio->metadata_block, &dio->metadata_offset,
|
||||
this_step_blocks * ic->tag_size, TAG_WRITE);
|
||||
if (unlikely(r)) {
|
||||
if (likely(checksums != checksums_onstack))
|
||||
kfree(checksums);
|
||||
goto error;
|
||||
}
|
||||
|
||||
/*if (bi_size < this_step_blocks << (SECTOR_SHIFT + ic->sb->log2_sectors_per_block)) {
|
||||
printk("BUGG: bi_sector: %llx, bi_size: %u\n", bi_sector, bi_size);
|
||||
printk("BUGG: this_step_blocks: %u\n", this_step_blocks);
|
||||
BUG();
|
||||
}*/
|
||||
bi_size -= this_step_blocks << (SECTOR_SHIFT + ic->sb->log2_sectors_per_block);
|
||||
bi_sector += this_step_blocks << ic->sb->log2_sectors_per_block;
|
||||
}
|
||||
|
||||
if (likely(checksums != checksums_onstack))
|
||||
kfree(checksums);
|
||||
goto skip_io;
|
||||
}
|
||||
|
||||
save_metadata_block = dio->metadata_block;
|
||||
save_metadata_offset = dio->metadata_offset;
|
||||
sector = dio->range.logical_sector;
|
||||
sectors_to_process = dio->range.n_sectors;
|
||||
|
||||
__bio_for_each_segment(bv, bio, iter, dio->bio_details.bi_iter) {
|
||||
unsigned pos;
|
||||
char *mem, *checksums_ptr;
|
||||
|
@ -1555,7 +1628,7 @@ again:
|
|||
kunmap_atomic(mem);
|
||||
|
||||
r = dm_integrity_rw_tag(ic, checksums, &dio->metadata_block, &dio->metadata_offset,
|
||||
checksums_ptr - checksums, !dio->write ? TAG_CMP : TAG_WRITE);
|
||||
checksums_ptr - checksums, dio->op == REQ_OP_READ ? TAG_CMP : TAG_WRITE);
|
||||
if (unlikely(r)) {
|
||||
if (r > 0) {
|
||||
char b[BDEVNAME_SIZE];
|
||||
|
@ -1599,7 +1672,7 @@ again:
|
|||
tag = lowmem_page_address(biv.bv_page) + biv.bv_offset;
|
||||
this_len = min(biv.bv_len, data_to_process);
|
||||
r = dm_integrity_rw_tag(ic, tag, &dio->metadata_block, &dio->metadata_offset,
|
||||
this_len, !dio->write ? TAG_READ : TAG_WRITE);
|
||||
this_len, dio->op == REQ_OP_READ ? TAG_READ : TAG_WRITE);
|
||||
if (unlikely(r))
|
||||
goto error;
|
||||
data_to_process -= this_len;
|
||||
|
@ -1626,6 +1699,20 @@ static int dm_integrity_map(struct dm_target *ti, struct bio *bio)
|
|||
|
||||
dio->ic = ic;
|
||||
dio->bi_status = 0;
|
||||
dio->op = bio_op(bio);
|
||||
|
||||
if (unlikely(dio->op == REQ_OP_DISCARD)) {
|
||||
if (ti->max_io_len) {
|
||||
sector_t sec = dm_target_offset(ti, bio->bi_iter.bi_sector);
|
||||
unsigned log2_max_io_len = __fls(ti->max_io_len);
|
||||
sector_t start_boundary = sec >> log2_max_io_len;
|
||||
sector_t end_boundary = (sec + bio_sectors(bio) - 1) >> log2_max_io_len;
|
||||
if (start_boundary < end_boundary) {
|
||||
sector_t len = ti->max_io_len - (sec & (ti->max_io_len - 1));
|
||||
dm_accept_partial_bio(bio, len);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if (unlikely(bio->bi_opf & REQ_PREFLUSH)) {
|
||||
submit_flush_bio(ic, dio);
|
||||
|
@ -1633,8 +1720,7 @@ static int dm_integrity_map(struct dm_target *ti, struct bio *bio)
|
|||
}
|
||||
|
||||
dio->range.logical_sector = dm_target_offset(ti, bio->bi_iter.bi_sector);
|
||||
dio->write = bio_op(bio) == REQ_OP_WRITE;
|
||||
dio->fua = dio->write && bio->bi_opf & REQ_FUA;
|
||||
dio->fua = dio->op == REQ_OP_WRITE && bio->bi_opf & REQ_FUA;
|
||||
if (unlikely(dio->fua)) {
|
||||
/*
|
||||
* Don't pass down the FUA flag because we have to flush
|
||||
|
@ -1655,7 +1741,7 @@ static int dm_integrity_map(struct dm_target *ti, struct bio *bio)
|
|||
return DM_MAPIO_KILL;
|
||||
}
|
||||
|
||||
if (ic->sectors_per_block > 1) {
|
||||
if (ic->sectors_per_block > 1 && likely(dio->op != REQ_OP_DISCARD)) {
|
||||
struct bvec_iter iter;
|
||||
struct bio_vec bv;
|
||||
bio_for_each_segment(bv, bio, iter) {
|
||||
|
@ -1688,7 +1774,7 @@ static int dm_integrity_map(struct dm_target *ti, struct bio *bio)
|
|||
}
|
||||
}
|
||||
|
||||
if (unlikely(ic->mode == 'R') && unlikely(dio->write))
|
||||
if (unlikely(ic->mode == 'R') && unlikely(dio->op != REQ_OP_READ))
|
||||
return DM_MAPIO_KILL;
|
||||
|
||||
get_area_and_offset(ic, dio->range.logical_sector, &area, &offset);
|
||||
|
@ -1718,13 +1804,13 @@ static bool __journal_read_write(struct dm_integrity_io *dio, struct bio *bio,
|
|||
bio_advance_iter(bio, &bio->bi_iter, bv.bv_len);
|
||||
retry_kmap:
|
||||
mem = kmap_atomic(bv.bv_page);
|
||||
if (likely(dio->write))
|
||||
if (likely(dio->op == REQ_OP_WRITE))
|
||||
flush_dcache_page(bv.bv_page);
|
||||
|
||||
do {
|
||||
struct journal_entry *je = access_journal_entry(ic, journal_section, journal_entry);
|
||||
|
||||
if (unlikely(!dio->write)) {
|
||||
if (unlikely(dio->op == REQ_OP_READ)) {
|
||||
struct journal_sector *js;
|
||||
char *mem_ptr;
|
||||
unsigned s;
|
||||
|
@ -1771,7 +1857,7 @@ retry_kmap:
|
|||
char *tag_addr;
|
||||
BUG_ON(PageHighMem(biv.bv_page));
|
||||
tag_addr = lowmem_page_address(biv.bv_page) + biv.bv_offset;
|
||||
if (likely(dio->write))
|
||||
if (likely(dio->op == REQ_OP_WRITE))
|
||||
memcpy(tag_ptr, tag_addr, tag_now);
|
||||
else
|
||||
memcpy(tag_addr, tag_ptr, tag_now);
|
||||
|
@ -1779,12 +1865,12 @@ retry_kmap:
|
|||
tag_ptr += tag_now;
|
||||
tag_todo -= tag_now;
|
||||
} while (unlikely(tag_todo)); else {
|
||||
if (likely(dio->write))
|
||||
if (likely(dio->op == REQ_OP_WRITE))
|
||||
memset(tag_ptr, 0, tag_todo);
|
||||
}
|
||||
}
|
||||
|
||||
if (likely(dio->write)) {
|
||||
if (likely(dio->op == REQ_OP_WRITE)) {
|
||||
struct journal_sector *js;
|
||||
unsigned s;
|
||||
|
||||
|
@ -1820,12 +1906,12 @@ retry_kmap:
|
|||
bv.bv_offset += ic->sectors_per_block << SECTOR_SHIFT;
|
||||
} while (bv.bv_len -= ic->sectors_per_block << SECTOR_SHIFT);
|
||||
|
||||
if (unlikely(!dio->write))
|
||||
if (unlikely(dio->op == REQ_OP_READ))
|
||||
flush_dcache_page(bv.bv_page);
|
||||
kunmap_atomic(mem);
|
||||
} while (n_sectors);
|
||||
|
||||
if (likely(dio->write)) {
|
||||
if (likely(dio->op == REQ_OP_WRITE)) {
|
||||
smp_mb();
|
||||
if (unlikely(waitqueue_active(&ic->copy_to_journal_wait)))
|
||||
wake_up(&ic->copy_to_journal_wait);
|
||||
|
@ -1857,7 +1943,9 @@ static void dm_integrity_map_continue(struct dm_integrity_io *dio, bool from_map
|
|||
unsigned journal_section, journal_entry;
|
||||
unsigned journal_read_pos;
|
||||
struct completion read_comp;
|
||||
bool need_sync_io = ic->internal_hash && !dio->write;
|
||||
bool need_sync_io = ic->internal_hash && dio->op == REQ_OP_READ;
|
||||
if (unlikely(dio->op == REQ_OP_DISCARD) && ic->mode != 'D')
|
||||
need_sync_io = true;
|
||||
|
||||
if (need_sync_io && from_map) {
|
||||
INIT_WORK(&dio->work, integrity_bio_wait);
|
||||
|
@ -1875,8 +1963,8 @@ retry:
|
|||
}
|
||||
dio->range.n_sectors = bio_sectors(bio);
|
||||
journal_read_pos = NOT_FOUND;
|
||||
if (likely(ic->mode == 'J')) {
|
||||
if (dio->write) {
|
||||
if (ic->mode == 'J' && likely(dio->op != REQ_OP_DISCARD)) {
|
||||
if (dio->op == REQ_OP_WRITE) {
|
||||
unsigned next_entry, i, pos;
|
||||
unsigned ws, we, range_sectors;
|
||||
|
||||
|
@ -1979,7 +2067,7 @@ offload_to_thread:
|
|||
goto journal_read_write;
|
||||
}
|
||||
|
||||
if (ic->mode == 'B' && dio->write) {
|
||||
if (ic->mode == 'B' && (dio->op == REQ_OP_WRITE || unlikely(dio->op == REQ_OP_DISCARD))) {
|
||||
if (!block_bitmap_op(ic, ic->may_write_bitmap, dio->range.logical_sector,
|
||||
dio->range.n_sectors, BITMAP_OP_TEST_ALL_SET)) {
|
||||
struct bitmap_block_status *bbs;
|
||||
|
@ -2008,6 +2096,18 @@ offload_to_thread:
|
|||
bio->bi_end_io = integrity_end_io;
|
||||
bio->bi_iter.bi_size = dio->range.n_sectors << SECTOR_SHIFT;
|
||||
|
||||
if (unlikely(dio->op == REQ_OP_DISCARD) && likely(ic->mode != 'D')) {
|
||||
integrity_metadata(&dio->work);
|
||||
dm_integrity_flush_buffers(ic);
|
||||
|
||||
dio->in_flight = (atomic_t)ATOMIC_INIT(1);
|
||||
dio->completion = NULL;
|
||||
|
||||
generic_make_request(bio);
|
||||
|
||||
return;
|
||||
}
|
||||
|
||||
generic_make_request(bio);
|
||||
|
||||
if (need_sync_io) {
|
||||
|
@ -2969,6 +3069,7 @@ static void dm_integrity_status(struct dm_target *ti, status_type_t type,
|
|||
arg_count += !!ic->meta_dev;
|
||||
arg_count += ic->sectors_per_block != 1;
|
||||
arg_count += !!(ic->sb->flags & cpu_to_le32(SB_FLAG_RECALCULATING));
|
||||
arg_count += ic->discard;
|
||||
arg_count += ic->mode == 'J';
|
||||
arg_count += ic->mode == 'J';
|
||||
arg_count += ic->mode == 'B';
|
||||
|
@ -2985,6 +3086,8 @@ static void dm_integrity_status(struct dm_target *ti, status_type_t type,
|
|||
DMEMIT(" block_size:%u", ic->sectors_per_block << SECTOR_SHIFT);
|
||||
if (ic->sb->flags & cpu_to_le32(SB_FLAG_RECALCULATING))
|
||||
DMEMIT(" recalculate");
|
||||
if (ic->discard)
|
||||
DMEMIT(" allow_discards");
|
||||
DMEMIT(" journal_sectors:%u", ic->initial_sectors - SB_SECTORS);
|
||||
DMEMIT(" interleave_sectors:%u", 1U << ic->sb->log2_interleave_sectors);
|
||||
DMEMIT(" buffer_sectors:%u", 1U << ic->log2_buffer_sectors);
|
||||
|
@ -3771,6 +3874,8 @@ static int dm_integrity_ctr(struct dm_target *ti, unsigned argc, char **argv)
|
|||
goto bad;
|
||||
} else if (!strcmp(opt_string, "recalculate")) {
|
||||
ic->recalculate_flag = true;
|
||||
} else if (!strcmp(opt_string, "allow_discards")) {
|
||||
ic->discard = true;
|
||||
} else if (!strcmp(opt_string, "fix_padding")) {
|
||||
ic->fix_padding = true;
|
||||
} else {
|
||||
|
@ -3829,6 +3934,12 @@ static int dm_integrity_ctr(struct dm_target *ti, unsigned argc, char **argv)
|
|||
goto bad;
|
||||
}
|
||||
|
||||
if (ic->discard && !ic->internal_hash) {
|
||||
r = -EINVAL;
|
||||
ti->error = "Discard can be only used with internal hash";
|
||||
goto bad;
|
||||
}
|
||||
|
||||
ic->autocommit_jiffies = msecs_to_jiffies(sync_msec);
|
||||
ic->autocommit_msec = sync_msec;
|
||||
timer_setup(&ic->autocommit_timer, autocommit_fn, 0);
|
||||
|
@ -4158,6 +4269,8 @@ try_smaller_buffer:
|
|||
|
||||
ti->num_flush_bios = 1;
|
||||
ti->flush_supported = true;
|
||||
if (ic->discard)
|
||||
ti->num_discard_bios = 1;
|
||||
|
||||
return 0;
|
||||
|
||||
|
|
Loading…
Reference in New Issue