Merge git://git.kernel.org/pub/scm/linux/kernel/git/agk/linux-2.6-dm
* git://git.kernel.org/pub/scm/linux/kernel/git/agk/linux-2.6-dm: dm: tidy local_init dm: remove unused flush_all dm raid1: separate region_hash interface part1 dm: mark split bio as cloned dm crypt: remove waitqueue dm crypt: fix async split dm crypt: tidy sector dm: remove dm header from targets dm: publish array_too_big dm exception store: fix misordered writes dm exception store: refactor zero_area dm snapshot: drop unused last_percent dm snapshot: fix primary_pe race dm kcopyd: avoid queue shuffle
This commit is contained in:
commit
3e5cce627c
|
@ -34,7 +34,7 @@ obj-$(CONFIG_DM_CRYPT) += dm-crypt.o
|
|||
obj-$(CONFIG_DM_DELAY) += dm-delay.o
|
||||
obj-$(CONFIG_DM_MULTIPATH) += dm-multipath.o dm-round-robin.o
|
||||
obj-$(CONFIG_DM_SNAPSHOT) += dm-snapshot.o
|
||||
obj-$(CONFIG_DM_MIRROR) += dm-mirror.o dm-log.o
|
||||
obj-$(CONFIG_DM_MIRROR) += dm-mirror.o dm-log.o dm-region-hash.o
|
||||
obj-$(CONFIG_DM_ZERO) += dm-zero.o
|
||||
|
||||
quiet_cmd_unroll = UNROLL $@
|
||||
|
|
|
@ -23,7 +23,7 @@
|
|||
#include <asm/page.h>
|
||||
#include <asm/unaligned.h>
|
||||
|
||||
#include "dm.h"
|
||||
#include <linux/device-mapper.h>
|
||||
|
||||
#define DM_MSG_PREFIX "crypt"
|
||||
#define MESG_STR(x) x, sizeof(x)
|
||||
|
@ -56,6 +56,7 @@ struct dm_crypt_io {
|
|||
atomic_t pending;
|
||||
int error;
|
||||
sector_t sector;
|
||||
struct dm_crypt_io *base_io;
|
||||
};
|
||||
|
||||
struct dm_crypt_request {
|
||||
|
@ -93,7 +94,6 @@ struct crypt_config {
|
|||
|
||||
struct workqueue_struct *io_queue;
|
||||
struct workqueue_struct *crypt_queue;
|
||||
wait_queue_head_t writeq;
|
||||
|
||||
/*
|
||||
* crypto related data
|
||||
|
@ -534,6 +534,7 @@ static struct dm_crypt_io *crypt_io_alloc(struct dm_target *ti,
|
|||
io->base_bio = bio;
|
||||
io->sector = sector;
|
||||
io->error = 0;
|
||||
io->base_io = NULL;
|
||||
atomic_set(&io->pending, 0);
|
||||
|
||||
return io;
|
||||
|
@ -547,6 +548,7 @@ static void crypt_inc_pending(struct dm_crypt_io *io)
|
|||
/*
|
||||
* One of the bios was finished. Check for completion of
|
||||
* the whole request and correctly clean up the buffer.
|
||||
* If base_io is set, wait for the last fragment to complete.
|
||||
*/
|
||||
static void crypt_dec_pending(struct dm_crypt_io *io)
|
||||
{
|
||||
|
@ -555,7 +557,14 @@ static void crypt_dec_pending(struct dm_crypt_io *io)
|
|||
if (!atomic_dec_and_test(&io->pending))
|
||||
return;
|
||||
|
||||
bio_endio(io->base_bio, io->error);
|
||||
if (likely(!io->base_io))
|
||||
bio_endio(io->base_bio, io->error);
|
||||
else {
|
||||
if (io->error && !io->base_io->error)
|
||||
io->base_io->error = io->error;
|
||||
crypt_dec_pending(io->base_io);
|
||||
}
|
||||
|
||||
mempool_free(io, cc->io_pool);
|
||||
}
|
||||
|
||||
|
@ -646,10 +655,7 @@ static void kcryptd_io_read(struct dm_crypt_io *io)
|
|||
static void kcryptd_io_write(struct dm_crypt_io *io)
|
||||
{
|
||||
struct bio *clone = io->ctx.bio_out;
|
||||
struct crypt_config *cc = io->target->private;
|
||||
|
||||
generic_make_request(clone);
|
||||
wake_up(&cc->writeq);
|
||||
}
|
||||
|
||||
static void kcryptd_io(struct work_struct *work)
|
||||
|
@ -688,7 +694,6 @@ static void kcryptd_crypt_write_io_submit(struct dm_crypt_io *io,
|
|||
BUG_ON(io->ctx.idx_out < clone->bi_vcnt);
|
||||
|
||||
clone->bi_sector = cc->start + io->sector;
|
||||
io->sector += bio_sectors(clone);
|
||||
|
||||
if (async)
|
||||
kcryptd_queue_io(io);
|
||||
|
@ -700,16 +705,18 @@ static void kcryptd_crypt_write_convert(struct dm_crypt_io *io)
|
|||
{
|
||||
struct crypt_config *cc = io->target->private;
|
||||
struct bio *clone;
|
||||
struct dm_crypt_io *new_io;
|
||||
int crypt_finished;
|
||||
unsigned out_of_pages = 0;
|
||||
unsigned remaining = io->base_bio->bi_size;
|
||||
sector_t sector = io->sector;
|
||||
int r;
|
||||
|
||||
/*
|
||||
* Prevent io from disappearing until this function completes.
|
||||
*/
|
||||
crypt_inc_pending(io);
|
||||
crypt_convert_init(cc, &io->ctx, NULL, io->base_bio, io->sector);
|
||||
crypt_convert_init(cc, &io->ctx, NULL, io->base_bio, sector);
|
||||
|
||||
/*
|
||||
* The allocated buffers can be smaller than the whole bio,
|
||||
|
@ -726,6 +733,7 @@ static void kcryptd_crypt_write_convert(struct dm_crypt_io *io)
|
|||
io->ctx.idx_out = 0;
|
||||
|
||||
remaining -= clone->bi_size;
|
||||
sector += bio_sectors(clone);
|
||||
|
||||
crypt_inc_pending(io);
|
||||
r = crypt_convert(cc, &io->ctx);
|
||||
|
@ -741,6 +749,8 @@ static void kcryptd_crypt_write_convert(struct dm_crypt_io *io)
|
|||
*/
|
||||
if (unlikely(r < 0))
|
||||
break;
|
||||
|
||||
io->sector = sector;
|
||||
}
|
||||
|
||||
/*
|
||||
|
@ -750,8 +760,33 @@ static void kcryptd_crypt_write_convert(struct dm_crypt_io *io)
|
|||
if (unlikely(out_of_pages))
|
||||
congestion_wait(WRITE, HZ/100);
|
||||
|
||||
if (unlikely(remaining))
|
||||
wait_event(cc->writeq, !atomic_read(&io->ctx.pending));
|
||||
/*
|
||||
* With async crypto it is unsafe to share the crypto context
|
||||
* between fragments, so switch to a new dm_crypt_io structure.
|
||||
*/
|
||||
if (unlikely(!crypt_finished && remaining)) {
|
||||
new_io = crypt_io_alloc(io->target, io->base_bio,
|
||||
sector);
|
||||
crypt_inc_pending(new_io);
|
||||
crypt_convert_init(cc, &new_io->ctx, NULL,
|
||||
io->base_bio, sector);
|
||||
new_io->ctx.idx_in = io->ctx.idx_in;
|
||||
new_io->ctx.offset_in = io->ctx.offset_in;
|
||||
|
||||
/*
|
||||
* Fragments after the first use the base_io
|
||||
* pending count.
|
||||
*/
|
||||
if (!io->base_io)
|
||||
new_io->base_io = io;
|
||||
else {
|
||||
new_io->base_io = io->base_io;
|
||||
crypt_inc_pending(io->base_io);
|
||||
crypt_dec_pending(io);
|
||||
}
|
||||
|
||||
io = new_io;
|
||||
}
|
||||
}
|
||||
|
||||
crypt_dec_pending(io);
|
||||
|
@ -1078,7 +1113,6 @@ static int crypt_ctr(struct dm_target *ti, unsigned int argc, char **argv)
|
|||
goto bad_crypt_queue;
|
||||
}
|
||||
|
||||
init_waitqueue_head(&cc->writeq);
|
||||
ti->private = cc;
|
||||
return 0;
|
||||
|
||||
|
|
|
@ -13,7 +13,8 @@
|
|||
#include <linux/bio.h>
|
||||
#include <linux/slab.h>
|
||||
|
||||
#include "dm.h"
|
||||
#include <linux/device-mapper.h>
|
||||
|
||||
#include "dm-bio-list.h"
|
||||
|
||||
#define DM_MSG_PREFIX "delay"
|
||||
|
|
|
@ -7,7 +7,6 @@
|
|||
* This file is released under the GPL.
|
||||
*/
|
||||
|
||||
#include "dm.h"
|
||||
#include "dm-snap.h"
|
||||
|
||||
#include <linux/mm.h>
|
||||
|
@ -104,6 +103,11 @@ struct pstore {
|
|||
*/
|
||||
void *area;
|
||||
|
||||
/*
|
||||
* An area of zeros used to clear the next area.
|
||||
*/
|
||||
void *zero_area;
|
||||
|
||||
/*
|
||||
* Used to keep track of which metadata area the data in
|
||||
* 'chunk' refers to.
|
||||
|
@ -149,6 +153,13 @@ static int alloc_area(struct pstore *ps)
|
|||
if (!ps->area)
|
||||
return r;
|
||||
|
||||
ps->zero_area = vmalloc(len);
|
||||
if (!ps->zero_area) {
|
||||
vfree(ps->area);
|
||||
return r;
|
||||
}
|
||||
memset(ps->zero_area, 0, len);
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
|
@ -156,6 +167,8 @@ static void free_area(struct pstore *ps)
|
|||
{
|
||||
vfree(ps->area);
|
||||
ps->area = NULL;
|
||||
vfree(ps->zero_area);
|
||||
ps->zero_area = NULL;
|
||||
}
|
||||
|
||||
struct mdata_req {
|
||||
|
@ -220,25 +233,41 @@ static chunk_t area_location(struct pstore *ps, chunk_t area)
|
|||
* Read or write a metadata area. Remembering to skip the first
|
||||
* chunk which holds the header.
|
||||
*/
|
||||
static int area_io(struct pstore *ps, chunk_t area, int rw)
|
||||
static int area_io(struct pstore *ps, int rw)
|
||||
{
|
||||
int r;
|
||||
chunk_t chunk;
|
||||
|
||||
chunk = area_location(ps, area);
|
||||
chunk = area_location(ps, ps->current_area);
|
||||
|
||||
r = chunk_io(ps, chunk, rw, 0);
|
||||
if (r)
|
||||
return r;
|
||||
|
||||
ps->current_area = area;
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int zero_area(struct pstore *ps, chunk_t area)
|
||||
static void zero_memory_area(struct pstore *ps)
|
||||
{
|
||||
memset(ps->area, 0, ps->snap->chunk_size << SECTOR_SHIFT);
|
||||
return area_io(ps, area, WRITE);
|
||||
}
|
||||
|
||||
static int zero_disk_area(struct pstore *ps, chunk_t area)
|
||||
{
|
||||
struct dm_io_region where = {
|
||||
.bdev = ps->snap->cow->bdev,
|
||||
.sector = ps->snap->chunk_size * area_location(ps, area),
|
||||
.count = ps->snap->chunk_size,
|
||||
};
|
||||
struct dm_io_request io_req = {
|
||||
.bi_rw = WRITE,
|
||||
.mem.type = DM_IO_VMA,
|
||||
.mem.ptr.vma = ps->zero_area,
|
||||
.client = ps->io_client,
|
||||
.notify.fn = NULL,
|
||||
};
|
||||
|
||||
return dm_io(&io_req, 1, &where, NULL);
|
||||
}
|
||||
|
||||
static int read_header(struct pstore *ps, int *new_snapshot)
|
||||
|
@ -411,15 +440,14 @@ static int insert_exceptions(struct pstore *ps, int *full)
|
|||
|
||||
static int read_exceptions(struct pstore *ps)
|
||||
{
|
||||
chunk_t area;
|
||||
int r, full = 1;
|
||||
|
||||
/*
|
||||
* Keeping reading chunks and inserting exceptions until
|
||||
* we find a partially full area.
|
||||
*/
|
||||
for (area = 0; full; area++) {
|
||||
r = area_io(ps, area, READ);
|
||||
for (ps->current_area = 0; full; ps->current_area++) {
|
||||
r = area_io(ps, READ);
|
||||
if (r)
|
||||
return r;
|
||||
|
||||
|
@ -428,6 +456,8 @@ static int read_exceptions(struct pstore *ps)
|
|||
return r;
|
||||
}
|
||||
|
||||
ps->current_area--;
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
|
@ -486,12 +516,13 @@ static int persistent_read_metadata(struct exception_store *store)
|
|||
return r;
|
||||
}
|
||||
|
||||
r = zero_area(ps, 0);
|
||||
ps->current_area = 0;
|
||||
zero_memory_area(ps);
|
||||
r = zero_disk_area(ps, 0);
|
||||
if (r) {
|
||||
DMWARN("zero_area(0) failed");
|
||||
DMWARN("zero_disk_area(0) failed");
|
||||
return r;
|
||||
}
|
||||
|
||||
} else {
|
||||
/*
|
||||
* Sanity checks.
|
||||
|
@ -551,7 +582,6 @@ static void persistent_commit(struct exception_store *store,
|
|||
void (*callback) (void *, int success),
|
||||
void *callback_context)
|
||||
{
|
||||
int r;
|
||||
unsigned int i;
|
||||
struct pstore *ps = get_info(store);
|
||||
struct disk_exception de;
|
||||
|
@ -572,33 +602,41 @@ static void persistent_commit(struct exception_store *store,
|
|||
cb->context = callback_context;
|
||||
|
||||
/*
|
||||
* If there are no more exceptions in flight, or we have
|
||||
* filled this metadata area we commit the exceptions to
|
||||
* disk.
|
||||
* If there are exceptions in flight and we have not yet
|
||||
* filled this metadata area there's nothing more to do.
|
||||
*/
|
||||
if (atomic_dec_and_test(&ps->pending_count) ||
|
||||
(ps->current_committed == ps->exceptions_per_area)) {
|
||||
r = area_io(ps, ps->current_area, WRITE);
|
||||
if (r)
|
||||
ps->valid = 0;
|
||||
if (!atomic_dec_and_test(&ps->pending_count) &&
|
||||
(ps->current_committed != ps->exceptions_per_area))
|
||||
return;
|
||||
|
||||
/*
|
||||
* Have we completely filled the current area ?
|
||||
*/
|
||||
if (ps->current_committed == ps->exceptions_per_area) {
|
||||
ps->current_committed = 0;
|
||||
r = zero_area(ps, ps->current_area + 1);
|
||||
if (r)
|
||||
ps->valid = 0;
|
||||
}
|
||||
/*
|
||||
* If we completely filled the current area, then wipe the next one.
|
||||
*/
|
||||
if ((ps->current_committed == ps->exceptions_per_area) &&
|
||||
zero_disk_area(ps, ps->current_area + 1))
|
||||
ps->valid = 0;
|
||||
|
||||
for (i = 0; i < ps->callback_count; i++) {
|
||||
cb = ps->callbacks + i;
|
||||
cb->callback(cb->context, r == 0 ? 1 : 0);
|
||||
}
|
||||
/*
|
||||
* Commit exceptions to disk.
|
||||
*/
|
||||
if (ps->valid && area_io(ps, WRITE))
|
||||
ps->valid = 0;
|
||||
|
||||
ps->callback_count = 0;
|
||||
/*
|
||||
* Advance to the next area if this one is full.
|
||||
*/
|
||||
if (ps->current_committed == ps->exceptions_per_area) {
|
||||
ps->current_committed = 0;
|
||||
ps->current_area++;
|
||||
zero_memory_area(ps);
|
||||
}
|
||||
|
||||
for (i = 0; i < ps->callback_count; i++) {
|
||||
cb = ps->callbacks + i;
|
||||
cb->callback(cb->context, ps->valid);
|
||||
}
|
||||
|
||||
ps->callback_count = 0;
|
||||
}
|
||||
|
||||
static void persistent_drop(struct exception_store *store)
|
||||
|
|
|
@ -5,7 +5,7 @@
|
|||
* This file is released under the GPL.
|
||||
*/
|
||||
|
||||
#include "dm.h"
|
||||
#include <linux/device-mapper.h>
|
||||
|
||||
#include <linux/bio.h>
|
||||
#include <linux/mempool.h>
|
||||
|
|
|
@ -22,6 +22,7 @@
|
|||
#include <linux/vmalloc.h>
|
||||
#include <linux/workqueue.h>
|
||||
#include <linux/mutex.h>
|
||||
#include <linux/device-mapper.h>
|
||||
#include <linux/dm-kcopyd.h>
|
||||
|
||||
#include "dm.h"
|
||||
|
@ -268,6 +269,17 @@ static void push(struct list_head *jobs, struct kcopyd_job *job)
|
|||
spin_unlock_irqrestore(&kc->job_lock, flags);
|
||||
}
|
||||
|
||||
|
||||
static void push_head(struct list_head *jobs, struct kcopyd_job *job)
|
||||
{
|
||||
unsigned long flags;
|
||||
struct dm_kcopyd_client *kc = job->kc;
|
||||
|
||||
spin_lock_irqsave(&kc->job_lock, flags);
|
||||
list_add(&job->list, jobs);
|
||||
spin_unlock_irqrestore(&kc->job_lock, flags);
|
||||
}
|
||||
|
||||
/*
|
||||
* These three functions process 1 item from the corresponding
|
||||
* job list.
|
||||
|
@ -398,7 +410,7 @@ static int process_jobs(struct list_head *jobs, struct dm_kcopyd_client *kc,
|
|||
* We couldn't service this job ATM, so
|
||||
* push this job back onto the list.
|
||||
*/
|
||||
push(jobs, job);
|
||||
push_head(jobs, job);
|
||||
break;
|
||||
}
|
||||
|
||||
|
|
|
@ -5,12 +5,12 @@
|
|||
*/
|
||||
|
||||
#include "dm.h"
|
||||
|
||||
#include <linux/module.h>
|
||||
#include <linux/init.h>
|
||||
#include <linux/blkdev.h>
|
||||
#include <linux/bio.h>
|
||||
#include <linux/slab.h>
|
||||
#include <linux/device-mapper.h>
|
||||
|
||||
#define DM_MSG_PREFIX "linear"
|
||||
|
||||
|
|
|
@ -12,7 +12,7 @@
|
|||
#include <linux/dm-io.h>
|
||||
#include <linux/dm-dirty-log.h>
|
||||
|
||||
#include "dm.h"
|
||||
#include <linux/device-mapper.h>
|
||||
|
||||
#define DM_MSG_PREFIX "dirty region log"
|
||||
|
||||
|
|
|
@ -5,7 +5,8 @@
|
|||
* This file is released under the GPL.
|
||||
*/
|
||||
|
||||
#include "dm.h"
|
||||
#include <linux/device-mapper.h>
|
||||
|
||||
#include "dm-path-selector.h"
|
||||
#include "dm-bio-list.h"
|
||||
#include "dm-bio-record.h"
|
||||
|
|
|
@ -9,7 +9,8 @@
|
|||
* Path selector registration.
|
||||
*/
|
||||
|
||||
#include "dm.h"
|
||||
#include <linux/device-mapper.h>
|
||||
|
||||
#include "dm-path-selector.h"
|
||||
|
||||
#include <linux/slab.h>
|
||||
|
|
File diff suppressed because it is too large
Load Diff
|
@ -0,0 +1,704 @@
|
|||
/*
|
||||
* Copyright (C) 2003 Sistina Software Limited.
|
||||
* Copyright (C) 2004-2008 Red Hat, Inc. All rights reserved.
|
||||
*
|
||||
* This file is released under the GPL.
|
||||
*/
|
||||
|
||||
#include <linux/dm-dirty-log.h>
|
||||
#include <linux/dm-region-hash.h>
|
||||
|
||||
#include <linux/ctype.h>
|
||||
#include <linux/init.h>
|
||||
#include <linux/module.h>
|
||||
#include <linux/vmalloc.h>
|
||||
|
||||
#include "dm.h"
|
||||
#include "dm-bio-list.h"
|
||||
|
||||
#define DM_MSG_PREFIX "region hash"
|
||||
|
||||
/*-----------------------------------------------------------------
|
||||
* Region hash
|
||||
*
|
||||
* The mirror splits itself up into discrete regions. Each
|
||||
* region can be in one of three states: clean, dirty,
|
||||
* nosync. There is no need to put clean regions in the hash.
|
||||
*
|
||||
* In addition to being present in the hash table a region _may_
|
||||
* be present on one of three lists.
|
||||
*
|
||||
* clean_regions: Regions on this list have no io pending to
|
||||
* them, they are in sync, we are no longer interested in them,
|
||||
* they are dull. dm_rh_update_states() will remove them from the
|
||||
* hash table.
|
||||
*
|
||||
* quiesced_regions: These regions have been spun down, ready
|
||||
* for recovery. rh_recovery_start() will remove regions from
|
||||
* this list and hand them to kmirrord, which will schedule the
|
||||
* recovery io with kcopyd.
|
||||
*
|
||||
* recovered_regions: Regions that kcopyd has successfully
|
||||
* recovered. dm_rh_update_states() will now schedule any delayed
|
||||
* io, up the recovery_count, and remove the region from the
|
||||
* hash.
|
||||
*
|
||||
* There are 2 locks:
|
||||
* A rw spin lock 'hash_lock' protects just the hash table,
|
||||
* this is never held in write mode from interrupt context,
|
||||
* which I believe means that we only have to disable irqs when
|
||||
* doing a write lock.
|
||||
*
|
||||
* An ordinary spin lock 'region_lock' that protects the three
|
||||
* lists in the region_hash, with the 'state', 'list' and
|
||||
* 'delayed_bios' fields of the regions. This is used from irq
|
||||
* context, so all other uses will have to suspend local irqs.
|
||||
*---------------------------------------------------------------*/
|
||||
struct dm_region_hash {
|
||||
uint32_t region_size;
|
||||
unsigned region_shift;
|
||||
|
||||
/* holds persistent region state */
|
||||
struct dm_dirty_log *log;
|
||||
|
||||
/* hash table */
|
||||
rwlock_t hash_lock;
|
||||
mempool_t *region_pool;
|
||||
unsigned mask;
|
||||
unsigned nr_buckets;
|
||||
unsigned prime;
|
||||
unsigned shift;
|
||||
struct list_head *buckets;
|
||||
|
||||
unsigned max_recovery; /* Max # of regions to recover in parallel */
|
||||
|
||||
spinlock_t region_lock;
|
||||
atomic_t recovery_in_flight;
|
||||
struct semaphore recovery_count;
|
||||
struct list_head clean_regions;
|
||||
struct list_head quiesced_regions;
|
||||
struct list_head recovered_regions;
|
||||
struct list_head failed_recovered_regions;
|
||||
|
||||
void *context;
|
||||
sector_t target_begin;
|
||||
|
||||
/* Callback function to schedule bios writes */
|
||||
void (*dispatch_bios)(void *context, struct bio_list *bios);
|
||||
|
||||
/* Callback function to wakeup callers worker thread. */
|
||||
void (*wakeup_workers)(void *context);
|
||||
|
||||
/* Callback function to wakeup callers recovery waiters. */
|
||||
void (*wakeup_all_recovery_waiters)(void *context);
|
||||
};
|
||||
|
||||
struct dm_region {
|
||||
struct dm_region_hash *rh; /* FIXME: can we get rid of this ? */
|
||||
region_t key;
|
||||
int state;
|
||||
|
||||
struct list_head hash_list;
|
||||
struct list_head list;
|
||||
|
||||
atomic_t pending;
|
||||
struct bio_list delayed_bios;
|
||||
};
|
||||
|
||||
/*
|
||||
* Conversion fns
|
||||
*/
|
||||
static region_t dm_rh_sector_to_region(struct dm_region_hash *rh, sector_t sector)
|
||||
{
|
||||
return sector >> rh->region_shift;
|
||||
}
|
||||
|
||||
sector_t dm_rh_region_to_sector(struct dm_region_hash *rh, region_t region)
|
||||
{
|
||||
return region << rh->region_shift;
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(dm_rh_region_to_sector);
|
||||
|
||||
region_t dm_rh_bio_to_region(struct dm_region_hash *rh, struct bio *bio)
|
||||
{
|
||||
return dm_rh_sector_to_region(rh, bio->bi_sector - rh->target_begin);
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(dm_rh_bio_to_region);
|
||||
|
||||
void *dm_rh_region_context(struct dm_region *reg)
|
||||
{
|
||||
return reg->rh->context;
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(dm_rh_region_context);
|
||||
|
||||
region_t dm_rh_get_region_key(struct dm_region *reg)
|
||||
{
|
||||
return reg->key;
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(dm_rh_get_region_key);
|
||||
|
||||
sector_t dm_rh_get_region_size(struct dm_region_hash *rh)
|
||||
{
|
||||
return rh->region_size;
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(dm_rh_get_region_size);
|
||||
|
||||
/*
|
||||
* FIXME: shall we pass in a structure instead of all these args to
|
||||
* dm_region_hash_create()????
|
||||
*/
|
||||
#define RH_HASH_MULT 2654435387U
|
||||
#define RH_HASH_SHIFT 12
|
||||
|
||||
#define MIN_REGIONS 64
|
||||
struct dm_region_hash *dm_region_hash_create(
|
||||
void *context, void (*dispatch_bios)(void *context,
|
||||
struct bio_list *bios),
|
||||
void (*wakeup_workers)(void *context),
|
||||
void (*wakeup_all_recovery_waiters)(void *context),
|
||||
sector_t target_begin, unsigned max_recovery,
|
||||
struct dm_dirty_log *log, uint32_t region_size,
|
||||
region_t nr_regions)
|
||||
{
|
||||
struct dm_region_hash *rh;
|
||||
unsigned nr_buckets, max_buckets;
|
||||
size_t i;
|
||||
|
||||
/*
|
||||
* Calculate a suitable number of buckets for our hash
|
||||
* table.
|
||||
*/
|
||||
max_buckets = nr_regions >> 6;
|
||||
for (nr_buckets = 128u; nr_buckets < max_buckets; nr_buckets <<= 1)
|
||||
;
|
||||
nr_buckets >>= 1;
|
||||
|
||||
rh = kmalloc(sizeof(*rh), GFP_KERNEL);
|
||||
if (!rh) {
|
||||
DMERR("unable to allocate region hash memory");
|
||||
return ERR_PTR(-ENOMEM);
|
||||
}
|
||||
|
||||
rh->context = context;
|
||||
rh->dispatch_bios = dispatch_bios;
|
||||
rh->wakeup_workers = wakeup_workers;
|
||||
rh->wakeup_all_recovery_waiters = wakeup_all_recovery_waiters;
|
||||
rh->target_begin = target_begin;
|
||||
rh->max_recovery = max_recovery;
|
||||
rh->log = log;
|
||||
rh->region_size = region_size;
|
||||
rh->region_shift = ffs(region_size) - 1;
|
||||
rwlock_init(&rh->hash_lock);
|
||||
rh->mask = nr_buckets - 1;
|
||||
rh->nr_buckets = nr_buckets;
|
||||
|
||||
rh->shift = RH_HASH_SHIFT;
|
||||
rh->prime = RH_HASH_MULT;
|
||||
|
||||
rh->buckets = vmalloc(nr_buckets * sizeof(*rh->buckets));
|
||||
if (!rh->buckets) {
|
||||
DMERR("unable to allocate region hash bucket memory");
|
||||
kfree(rh);
|
||||
return ERR_PTR(-ENOMEM);
|
||||
}
|
||||
|
||||
for (i = 0; i < nr_buckets; i++)
|
||||
INIT_LIST_HEAD(rh->buckets + i);
|
||||
|
||||
spin_lock_init(&rh->region_lock);
|
||||
sema_init(&rh->recovery_count, 0);
|
||||
atomic_set(&rh->recovery_in_flight, 0);
|
||||
INIT_LIST_HEAD(&rh->clean_regions);
|
||||
INIT_LIST_HEAD(&rh->quiesced_regions);
|
||||
INIT_LIST_HEAD(&rh->recovered_regions);
|
||||
INIT_LIST_HEAD(&rh->failed_recovered_regions);
|
||||
|
||||
rh->region_pool = mempool_create_kmalloc_pool(MIN_REGIONS,
|
||||
sizeof(struct dm_region));
|
||||
if (!rh->region_pool) {
|
||||
vfree(rh->buckets);
|
||||
kfree(rh);
|
||||
rh = ERR_PTR(-ENOMEM);
|
||||
}
|
||||
|
||||
return rh;
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(dm_region_hash_create);
|
||||
|
||||
void dm_region_hash_destroy(struct dm_region_hash *rh)
|
||||
{
|
||||
unsigned h;
|
||||
struct dm_region *reg, *nreg;
|
||||
|
||||
BUG_ON(!list_empty(&rh->quiesced_regions));
|
||||
for (h = 0; h < rh->nr_buckets; h++) {
|
||||
list_for_each_entry_safe(reg, nreg, rh->buckets + h,
|
||||
hash_list) {
|
||||
BUG_ON(atomic_read(®->pending));
|
||||
mempool_free(reg, rh->region_pool);
|
||||
}
|
||||
}
|
||||
|
||||
if (rh->log)
|
||||
dm_dirty_log_destroy(rh->log);
|
||||
|
||||
if (rh->region_pool)
|
||||
mempool_destroy(rh->region_pool);
|
||||
|
||||
vfree(rh->buckets);
|
||||
kfree(rh);
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(dm_region_hash_destroy);
|
||||
|
||||
struct dm_dirty_log *dm_rh_dirty_log(struct dm_region_hash *rh)
|
||||
{
|
||||
return rh->log;
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(dm_rh_dirty_log);
|
||||
|
||||
static unsigned rh_hash(struct dm_region_hash *rh, region_t region)
|
||||
{
|
||||
return (unsigned) ((region * rh->prime) >> rh->shift) & rh->mask;
|
||||
}
|
||||
|
||||
static struct dm_region *__rh_lookup(struct dm_region_hash *rh, region_t region)
|
||||
{
|
||||
struct dm_region *reg;
|
||||
struct list_head *bucket = rh->buckets + rh_hash(rh, region);
|
||||
|
||||
list_for_each_entry(reg, bucket, hash_list)
|
||||
if (reg->key == region)
|
||||
return reg;
|
||||
|
||||
return NULL;
|
||||
}
|
||||
|
||||
static void __rh_insert(struct dm_region_hash *rh, struct dm_region *reg)
|
||||
{
|
||||
list_add(®->hash_list, rh->buckets + rh_hash(rh, reg->key));
|
||||
}
|
||||
|
||||
static struct dm_region *__rh_alloc(struct dm_region_hash *rh, region_t region)
|
||||
{
|
||||
struct dm_region *reg, *nreg;
|
||||
|
||||
nreg = mempool_alloc(rh->region_pool, GFP_ATOMIC);
|
||||
if (unlikely(!nreg))
|
||||
nreg = kmalloc(sizeof(*nreg), GFP_NOIO);
|
||||
|
||||
nreg->state = rh->log->type->in_sync(rh->log, region, 1) ?
|
||||
DM_RH_CLEAN : DM_RH_NOSYNC;
|
||||
nreg->rh = rh;
|
||||
nreg->key = region;
|
||||
INIT_LIST_HEAD(&nreg->list);
|
||||
atomic_set(&nreg->pending, 0);
|
||||
bio_list_init(&nreg->delayed_bios);
|
||||
|
||||
write_lock_irq(&rh->hash_lock);
|
||||
reg = __rh_lookup(rh, region);
|
||||
if (reg)
|
||||
/* We lost the race. */
|
||||
mempool_free(nreg, rh->region_pool);
|
||||
else {
|
||||
__rh_insert(rh, nreg);
|
||||
if (nreg->state == DM_RH_CLEAN) {
|
||||
spin_lock(&rh->region_lock);
|
||||
list_add(&nreg->list, &rh->clean_regions);
|
||||
spin_unlock(&rh->region_lock);
|
||||
}
|
||||
|
||||
reg = nreg;
|
||||
}
|
||||
write_unlock_irq(&rh->hash_lock);
|
||||
|
||||
return reg;
|
||||
}
|
||||
|
||||
static struct dm_region *__rh_find(struct dm_region_hash *rh, region_t region)
|
||||
{
|
||||
struct dm_region *reg;
|
||||
|
||||
reg = __rh_lookup(rh, region);
|
||||
if (!reg) {
|
||||
read_unlock(&rh->hash_lock);
|
||||
reg = __rh_alloc(rh, region);
|
||||
read_lock(&rh->hash_lock);
|
||||
}
|
||||
|
||||
return reg;
|
||||
}
|
||||
|
||||
int dm_rh_get_state(struct dm_region_hash *rh, region_t region, int may_block)
|
||||
{
|
||||
int r;
|
||||
struct dm_region *reg;
|
||||
|
||||
read_lock(&rh->hash_lock);
|
||||
reg = __rh_lookup(rh, region);
|
||||
read_unlock(&rh->hash_lock);
|
||||
|
||||
if (reg)
|
||||
return reg->state;
|
||||
|
||||
/*
|
||||
* The region wasn't in the hash, so we fall back to the
|
||||
* dirty log.
|
||||
*/
|
||||
r = rh->log->type->in_sync(rh->log, region, may_block);
|
||||
|
||||
/*
|
||||
* Any error from the dirty log (eg. -EWOULDBLOCK) gets
|
||||
* taken as a DM_RH_NOSYNC
|
||||
*/
|
||||
return r == 1 ? DM_RH_CLEAN : DM_RH_NOSYNC;
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(dm_rh_get_state);
|
||||
|
||||
static void complete_resync_work(struct dm_region *reg, int success)
|
||||
{
|
||||
struct dm_region_hash *rh = reg->rh;
|
||||
|
||||
rh->log->type->set_region_sync(rh->log, reg->key, success);
|
||||
|
||||
/*
|
||||
* Dispatch the bios before we call 'wake_up_all'.
|
||||
* This is important because if we are suspending,
|
||||
* we want to know that recovery is complete and
|
||||
* the work queue is flushed. If we wake_up_all
|
||||
* before we dispatch_bios (queue bios and call wake()),
|
||||
* then we risk suspending before the work queue
|
||||
* has been properly flushed.
|
||||
*/
|
||||
rh->dispatch_bios(rh->context, ®->delayed_bios);
|
||||
if (atomic_dec_and_test(&rh->recovery_in_flight))
|
||||
rh->wakeup_all_recovery_waiters(rh->context);
|
||||
up(&rh->recovery_count);
|
||||
}
|
||||
|
||||
/* dm_rh_mark_nosync
|
||||
* @ms
|
||||
* @bio
|
||||
* @done
|
||||
* @error
|
||||
*
|
||||
* The bio was written on some mirror(s) but failed on other mirror(s).
|
||||
* We can successfully endio the bio but should avoid the region being
|
||||
* marked clean by setting the state DM_RH_NOSYNC.
|
||||
*
|
||||
* This function is _not_ safe in interrupt context!
|
||||
*/
|
||||
void dm_rh_mark_nosync(struct dm_region_hash *rh,
|
||||
struct bio *bio, unsigned done, int error)
|
||||
{
|
||||
unsigned long flags;
|
||||
struct dm_dirty_log *log = rh->log;
|
||||
struct dm_region *reg;
|
||||
region_t region = dm_rh_bio_to_region(rh, bio);
|
||||
int recovering = 0;
|
||||
|
||||
/* We must inform the log that the sync count has changed. */
|
||||
log->type->set_region_sync(log, region, 0);
|
||||
|
||||
read_lock(&rh->hash_lock);
|
||||
reg = __rh_find(rh, region);
|
||||
read_unlock(&rh->hash_lock);
|
||||
|
||||
/* region hash entry should exist because write was in-flight */
|
||||
BUG_ON(!reg);
|
||||
BUG_ON(!list_empty(®->list));
|
||||
|
||||
spin_lock_irqsave(&rh->region_lock, flags);
|
||||
/*
|
||||
* Possible cases:
|
||||
* 1) DM_RH_DIRTY
|
||||
* 2) DM_RH_NOSYNC: was dirty, other preceeding writes failed
|
||||
* 3) DM_RH_RECOVERING: flushing pending writes
|
||||
* Either case, the region should have not been connected to list.
|
||||
*/
|
||||
recovering = (reg->state == DM_RH_RECOVERING);
|
||||
reg->state = DM_RH_NOSYNC;
|
||||
BUG_ON(!list_empty(®->list));
|
||||
spin_unlock_irqrestore(&rh->region_lock, flags);
|
||||
|
||||
bio_endio(bio, error);
|
||||
if (recovering)
|
||||
complete_resync_work(reg, 0);
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(dm_rh_mark_nosync);
|
||||
|
||||
void dm_rh_update_states(struct dm_region_hash *rh, int errors_handled)
|
||||
{
|
||||
struct dm_region *reg, *next;
|
||||
|
||||
LIST_HEAD(clean);
|
||||
LIST_HEAD(recovered);
|
||||
LIST_HEAD(failed_recovered);
|
||||
|
||||
/*
|
||||
* Quickly grab the lists.
|
||||
*/
|
||||
write_lock_irq(&rh->hash_lock);
|
||||
spin_lock(&rh->region_lock);
|
||||
if (!list_empty(&rh->clean_regions)) {
|
||||
list_splice_init(&rh->clean_regions, &clean);
|
||||
|
||||
list_for_each_entry(reg, &clean, list)
|
||||
list_del(®->hash_list);
|
||||
}
|
||||
|
||||
if (!list_empty(&rh->recovered_regions)) {
|
||||
list_splice_init(&rh->recovered_regions, &recovered);
|
||||
|
||||
list_for_each_entry(reg, &recovered, list)
|
||||
list_del(®->hash_list);
|
||||
}
|
||||
|
||||
if (!list_empty(&rh->failed_recovered_regions)) {
|
||||
list_splice_init(&rh->failed_recovered_regions,
|
||||
&failed_recovered);
|
||||
|
||||
list_for_each_entry(reg, &failed_recovered, list)
|
||||
list_del(®->hash_list);
|
||||
}
|
||||
|
||||
spin_unlock(&rh->region_lock);
|
||||
write_unlock_irq(&rh->hash_lock);
|
||||
|
||||
/*
|
||||
* All the regions on the recovered and clean lists have
|
||||
* now been pulled out of the system, so no need to do
|
||||
* any more locking.
|
||||
*/
|
||||
list_for_each_entry_safe(reg, next, &recovered, list) {
|
||||
rh->log->type->clear_region(rh->log, reg->key);
|
||||
complete_resync_work(reg, 1);
|
||||
mempool_free(reg, rh->region_pool);
|
||||
}
|
||||
|
||||
list_for_each_entry_safe(reg, next, &failed_recovered, list) {
|
||||
complete_resync_work(reg, errors_handled ? 0 : 1);
|
||||
mempool_free(reg, rh->region_pool);
|
||||
}
|
||||
|
||||
list_for_each_entry_safe(reg, next, &clean, list) {
|
||||
rh->log->type->clear_region(rh->log, reg->key);
|
||||
mempool_free(reg, rh->region_pool);
|
||||
}
|
||||
|
||||
rh->log->type->flush(rh->log);
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(dm_rh_update_states);
|
||||
|
||||
static void rh_inc(struct dm_region_hash *rh, region_t region)
|
||||
{
|
||||
struct dm_region *reg;
|
||||
|
||||
read_lock(&rh->hash_lock);
|
||||
reg = __rh_find(rh, region);
|
||||
|
||||
spin_lock_irq(&rh->region_lock);
|
||||
atomic_inc(®->pending);
|
||||
|
||||
if (reg->state == DM_RH_CLEAN) {
|
||||
reg->state = DM_RH_DIRTY;
|
||||
list_del_init(®->list); /* take off the clean list */
|
||||
spin_unlock_irq(&rh->region_lock);
|
||||
|
||||
rh->log->type->mark_region(rh->log, reg->key);
|
||||
} else
|
||||
spin_unlock_irq(&rh->region_lock);
|
||||
|
||||
|
||||
read_unlock(&rh->hash_lock);
|
||||
}
|
||||
|
||||
void dm_rh_inc_pending(struct dm_region_hash *rh, struct bio_list *bios)
|
||||
{
|
||||
struct bio *bio;
|
||||
|
||||
for (bio = bios->head; bio; bio = bio->bi_next)
|
||||
rh_inc(rh, dm_rh_bio_to_region(rh, bio));
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(dm_rh_inc_pending);
|
||||
|
||||
void dm_rh_dec(struct dm_region_hash *rh, region_t region)
|
||||
{
|
||||
unsigned long flags;
|
||||
struct dm_region *reg;
|
||||
int should_wake = 0;
|
||||
|
||||
read_lock(&rh->hash_lock);
|
||||
reg = __rh_lookup(rh, region);
|
||||
read_unlock(&rh->hash_lock);
|
||||
|
||||
spin_lock_irqsave(&rh->region_lock, flags);
|
||||
if (atomic_dec_and_test(®->pending)) {
|
||||
/*
|
||||
* There is no pending I/O for this region.
|
||||
* We can move the region to corresponding list for next action.
|
||||
* At this point, the region is not yet connected to any list.
|
||||
*
|
||||
* If the state is DM_RH_NOSYNC, the region should be kept off
|
||||
* from clean list.
|
||||
* The hash entry for DM_RH_NOSYNC will remain in memory
|
||||
* until the region is recovered or the map is reloaded.
|
||||
*/
|
||||
|
||||
/* do nothing for DM_RH_NOSYNC */
|
||||
if (reg->state == DM_RH_RECOVERING) {
|
||||
list_add_tail(®->list, &rh->quiesced_regions);
|
||||
} else if (reg->state == DM_RH_DIRTY) {
|
||||
reg->state = DM_RH_CLEAN;
|
||||
list_add(®->list, &rh->clean_regions);
|
||||
}
|
||||
should_wake = 1;
|
||||
}
|
||||
spin_unlock_irqrestore(&rh->region_lock, flags);
|
||||
|
||||
if (should_wake)
|
||||
rh->wakeup_workers(rh->context);
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(dm_rh_dec);
|
||||
|
||||
/*
|
||||
* Starts quiescing a region in preparation for recovery.
|
||||
*/
|
||||
static int __rh_recovery_prepare(struct dm_region_hash *rh)
|
||||
{
|
||||
int r;
|
||||
region_t region;
|
||||
struct dm_region *reg;
|
||||
|
||||
/*
|
||||
* Ask the dirty log what's next.
|
||||
*/
|
||||
r = rh->log->type->get_resync_work(rh->log, ®ion);
|
||||
if (r <= 0)
|
||||
return r;
|
||||
|
||||
/*
|
||||
* Get this region, and start it quiescing by setting the
|
||||
* recovering flag.
|
||||
*/
|
||||
read_lock(&rh->hash_lock);
|
||||
reg = __rh_find(rh, region);
|
||||
read_unlock(&rh->hash_lock);
|
||||
|
||||
spin_lock_irq(&rh->region_lock);
|
||||
reg->state = DM_RH_RECOVERING;
|
||||
|
||||
/* Already quiesced ? */
|
||||
if (atomic_read(®->pending))
|
||||
list_del_init(®->list);
|
||||
else
|
||||
list_move(®->list, &rh->quiesced_regions);
|
||||
|
||||
spin_unlock_irq(&rh->region_lock);
|
||||
|
||||
return 1;
|
||||
}
|
||||
|
||||
void dm_rh_recovery_prepare(struct dm_region_hash *rh)
|
||||
{
|
||||
/* Extra reference to avoid race with dm_rh_stop_recovery */
|
||||
atomic_inc(&rh->recovery_in_flight);
|
||||
|
||||
while (!down_trylock(&rh->recovery_count)) {
|
||||
atomic_inc(&rh->recovery_in_flight);
|
||||
if (__rh_recovery_prepare(rh) <= 0) {
|
||||
atomic_dec(&rh->recovery_in_flight);
|
||||
up(&rh->recovery_count);
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
/* Drop the extra reference */
|
||||
if (atomic_dec_and_test(&rh->recovery_in_flight))
|
||||
rh->wakeup_all_recovery_waiters(rh->context);
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(dm_rh_recovery_prepare);
|
||||
|
||||
/*
|
||||
* Returns any quiesced regions.
|
||||
*/
|
||||
struct dm_region *dm_rh_recovery_start(struct dm_region_hash *rh)
|
||||
{
|
||||
struct dm_region *reg = NULL;
|
||||
|
||||
spin_lock_irq(&rh->region_lock);
|
||||
if (!list_empty(&rh->quiesced_regions)) {
|
||||
reg = list_entry(rh->quiesced_regions.next,
|
||||
struct dm_region, list);
|
||||
list_del_init(®->list); /* remove from the quiesced list */
|
||||
}
|
||||
spin_unlock_irq(&rh->region_lock);
|
||||
|
||||
return reg;
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(dm_rh_recovery_start);
|
||||
|
||||
void dm_rh_recovery_end(struct dm_region *reg, int success)
|
||||
{
|
||||
struct dm_region_hash *rh = reg->rh;
|
||||
|
||||
spin_lock_irq(&rh->region_lock);
|
||||
if (success)
|
||||
list_add(®->list, ®->rh->recovered_regions);
|
||||
else {
|
||||
reg->state = DM_RH_NOSYNC;
|
||||
list_add(®->list, ®->rh->failed_recovered_regions);
|
||||
}
|
||||
spin_unlock_irq(&rh->region_lock);
|
||||
|
||||
rh->wakeup_workers(rh->context);
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(dm_rh_recovery_end);
|
||||
|
||||
/* Return recovery in flight count. */
|
||||
int dm_rh_recovery_in_flight(struct dm_region_hash *rh)
|
||||
{
|
||||
return atomic_read(&rh->recovery_in_flight);
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(dm_rh_recovery_in_flight);
|
||||
|
||||
int dm_rh_flush(struct dm_region_hash *rh)
|
||||
{
|
||||
return rh->log->type->flush(rh->log);
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(dm_rh_flush);
|
||||
|
||||
void dm_rh_delay(struct dm_region_hash *rh, struct bio *bio)
|
||||
{
|
||||
struct dm_region *reg;
|
||||
|
||||
read_lock(&rh->hash_lock);
|
||||
reg = __rh_find(rh, dm_rh_bio_to_region(rh, bio));
|
||||
bio_list_add(®->delayed_bios, bio);
|
||||
read_unlock(&rh->hash_lock);
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(dm_rh_delay);
|
||||
|
||||
void dm_rh_stop_recovery(struct dm_region_hash *rh)
|
||||
{
|
||||
int i;
|
||||
|
||||
/* wait for any recovering regions */
|
||||
for (i = 0; i < rh->max_recovery; i++)
|
||||
down(&rh->recovery_count);
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(dm_rh_stop_recovery);
|
||||
|
||||
void dm_rh_start_recovery(struct dm_region_hash *rh)
|
||||
{
|
||||
int i;
|
||||
|
||||
for (i = 0; i < rh->max_recovery; i++)
|
||||
up(&rh->recovery_count);
|
||||
|
||||
rh->wakeup_workers(rh->context);
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(dm_rh_start_recovery);
|
||||
|
||||
MODULE_DESCRIPTION(DM_NAME " region hash");
|
||||
MODULE_AUTHOR("Joe Thornber/Heinz Mauelshagen <dm-devel@redhat.com>");
|
||||
MODULE_LICENSE("GPL");
|
|
@ -9,7 +9,8 @@
|
|||
* Round-robin path selector.
|
||||
*/
|
||||
|
||||
#include "dm.h"
|
||||
#include <linux/device-mapper.h>
|
||||
|
||||
#include "dm-path-selector.h"
|
||||
|
||||
#include <linux/slab.h>
|
||||
|
|
|
@ -600,7 +600,6 @@ static int snapshot_ctr(struct dm_target *ti, unsigned int argc, char **argv)
|
|||
|
||||
s->valid = 1;
|
||||
s->active = 0;
|
||||
s->last_percent = 0;
|
||||
init_rwsem(&s->lock);
|
||||
spin_lock_init(&s->pe_lock);
|
||||
s->ti = ti;
|
||||
|
@ -824,8 +823,10 @@ static struct bio *put_pending_exception(struct dm_snap_pending_exception *pe)
|
|||
* the bios for the original write to the origin.
|
||||
*/
|
||||
if (primary_pe &&
|
||||
atomic_dec_and_test(&primary_pe->ref_count))
|
||||
atomic_dec_and_test(&primary_pe->ref_count)) {
|
||||
origin_bios = bio_list_get(&primary_pe->origin_bios);
|
||||
free_pending_exception(primary_pe);
|
||||
}
|
||||
|
||||
/*
|
||||
* Free the pe if it's not linked to an origin write or if
|
||||
|
@ -834,12 +835,6 @@ static struct bio *put_pending_exception(struct dm_snap_pending_exception *pe)
|
|||
if (!primary_pe || primary_pe != pe)
|
||||
free_pending_exception(pe);
|
||||
|
||||
/*
|
||||
* Free the primary pe if nothing references it.
|
||||
*/
|
||||
if (primary_pe && !atomic_read(&primary_pe->ref_count))
|
||||
free_pending_exception(primary_pe);
|
||||
|
||||
return origin_bios;
|
||||
}
|
||||
|
||||
|
|
|
@ -9,7 +9,7 @@
|
|||
#ifndef DM_SNAPSHOT_H
|
||||
#define DM_SNAPSHOT_H
|
||||
|
||||
#include "dm.h"
|
||||
#include <linux/device-mapper.h>
|
||||
#include "dm-bio-list.h"
|
||||
#include <linux/blkdev.h>
|
||||
#include <linux/workqueue.h>
|
||||
|
@ -158,9 +158,6 @@ struct dm_snapshot {
|
|||
/* Used for display of table */
|
||||
char type;
|
||||
|
||||
/* The last percentage we notified */
|
||||
int last_percent;
|
||||
|
||||
mempool_t *pending_pool;
|
||||
|
||||
struct exception_table pending;
|
||||
|
|
|
@ -4,7 +4,7 @@
|
|||
* This file is released under the GPL.
|
||||
*/
|
||||
|
||||
#include "dm.h"
|
||||
#include <linux/device-mapper.h>
|
||||
|
||||
#include <linux/module.h>
|
||||
#include <linux/init.h>
|
||||
|
@ -60,8 +60,8 @@ static inline struct stripe_c *alloc_context(unsigned int stripes)
|
|||
{
|
||||
size_t len;
|
||||
|
||||
if (array_too_big(sizeof(struct stripe_c), sizeof(struct stripe),
|
||||
stripes))
|
||||
if (dm_array_too_big(sizeof(struct stripe_c), sizeof(struct stripe),
|
||||
stripes))
|
||||
return NULL;
|
||||
|
||||
len = sizeof(struct stripe_c) + (sizeof(struct stripe) * stripes);
|
||||
|
|
|
@ -4,7 +4,7 @@
|
|||
* This file is released under the GPL.
|
||||
*/
|
||||
|
||||
#include "dm.h"
|
||||
#include <linux/device-mapper.h>
|
||||
|
||||
#include <linux/module.h>
|
||||
#include <linux/init.h>
|
||||
|
|
|
@ -76,7 +76,6 @@ union map_info *dm_get_mapinfo(struct bio *bio)
|
|||
*/
|
||||
struct dm_wq_req {
|
||||
enum {
|
||||
DM_WQ_FLUSH_ALL,
|
||||
DM_WQ_FLUSH_DEFERRED,
|
||||
} type;
|
||||
struct work_struct work;
|
||||
|
@ -151,40 +150,40 @@ static struct kmem_cache *_tio_cache;
|
|||
|
||||
static int __init local_init(void)
|
||||
{
|
||||
int r;
|
||||
int r = -ENOMEM;
|
||||
|
||||
/* allocate a slab for the dm_ios */
|
||||
_io_cache = KMEM_CACHE(dm_io, 0);
|
||||
if (!_io_cache)
|
||||
return -ENOMEM;
|
||||
return r;
|
||||
|
||||
/* allocate a slab for the target ios */
|
||||
_tio_cache = KMEM_CACHE(dm_target_io, 0);
|
||||
if (!_tio_cache) {
|
||||
kmem_cache_destroy(_io_cache);
|
||||
return -ENOMEM;
|
||||
}
|
||||
if (!_tio_cache)
|
||||
goto out_free_io_cache;
|
||||
|
||||
r = dm_uevent_init();
|
||||
if (r) {
|
||||
kmem_cache_destroy(_tio_cache);
|
||||
kmem_cache_destroy(_io_cache);
|
||||
return r;
|
||||
}
|
||||
if (r)
|
||||
goto out_free_tio_cache;
|
||||
|
||||
_major = major;
|
||||
r = register_blkdev(_major, _name);
|
||||
if (r < 0) {
|
||||
kmem_cache_destroy(_tio_cache);
|
||||
kmem_cache_destroy(_io_cache);
|
||||
dm_uevent_exit();
|
||||
return r;
|
||||
}
|
||||
if (r < 0)
|
||||
goto out_uevent_exit;
|
||||
|
||||
if (!_major)
|
||||
_major = r;
|
||||
|
||||
return 0;
|
||||
|
||||
out_uevent_exit:
|
||||
dm_uevent_exit();
|
||||
out_free_tio_cache:
|
||||
kmem_cache_destroy(_tio_cache);
|
||||
out_free_io_cache:
|
||||
kmem_cache_destroy(_io_cache);
|
||||
|
||||
return r;
|
||||
}
|
||||
|
||||
static void local_exit(void)
|
||||
|
@ -669,6 +668,7 @@ static struct bio *split_bvec(struct bio *bio, sector_t sector,
|
|||
clone->bi_size = to_bytes(len);
|
||||
clone->bi_io_vec->bv_offset = offset;
|
||||
clone->bi_io_vec->bv_len = clone->bi_size;
|
||||
clone->bi_flags |= 1 << BIO_CLONED;
|
||||
|
||||
return clone;
|
||||
}
|
||||
|
@ -1394,9 +1394,6 @@ static void dm_wq_work(struct work_struct *work)
|
|||
|
||||
down_write(&md->io_lock);
|
||||
switch (req->type) {
|
||||
case DM_WQ_FLUSH_ALL:
|
||||
__merge_pushback_list(md);
|
||||
/* pass through */
|
||||
case DM_WQ_FLUSH_DEFERRED:
|
||||
__flush_deferred_io(md);
|
||||
break;
|
||||
|
@ -1526,7 +1523,7 @@ int dm_suspend(struct mapped_device *md, unsigned suspend_flags)
|
|||
if (!md->suspended_bdev) {
|
||||
DMWARN("bdget failed in dm_suspend");
|
||||
r = -ENOMEM;
|
||||
goto flush_and_out;
|
||||
goto out;
|
||||
}
|
||||
|
||||
/*
|
||||
|
@ -1577,14 +1574,6 @@ int dm_suspend(struct mapped_device *md, unsigned suspend_flags)
|
|||
|
||||
set_bit(DMF_SUSPENDED, &md->flags);
|
||||
|
||||
flush_and_out:
|
||||
if (r && noflush)
|
||||
/*
|
||||
* Because there may be already I/Os in the pushback list,
|
||||
* flush them before return.
|
||||
*/
|
||||
dm_queue_flush(md, DM_WQ_FLUSH_ALL, NULL);
|
||||
|
||||
out:
|
||||
if (r && md->suspended_bdev) {
|
||||
bdput(md->suspended_bdev);
|
||||
|
|
|
@ -62,15 +62,6 @@ void dm_put_target_type(struct target_type *t);
|
|||
int dm_target_iterate(void (*iter_func)(struct target_type *tt,
|
||||
void *param), void *param);
|
||||
|
||||
/*-----------------------------------------------------------------
|
||||
* Useful inlines.
|
||||
*---------------------------------------------------------------*/
|
||||
static inline int array_too_big(unsigned long fixed, unsigned long obj,
|
||||
unsigned long num)
|
||||
{
|
||||
return (num > (ULONG_MAX - fixed) / obj);
|
||||
}
|
||||
|
||||
int dm_split_args(int *argc, char ***argvp, char *input);
|
||||
|
||||
/*
|
||||
|
|
|
@ -354,6 +354,9 @@ void *dm_vcalloc(unsigned long nmemb, unsigned long elem_size);
|
|||
*/
|
||||
#define dm_round_up(n, sz) (dm_div_up((n), (sz)) * (sz))
|
||||
|
||||
#define dm_array_too_big(fixed, obj, num) \
|
||||
((num) > (UINT_MAX - (fixed)) / (obj))
|
||||
|
||||
static inline sector_t to_sector(unsigned long n)
|
||||
{
|
||||
return (n >> SECTOR_SHIFT);
|
||||
|
|
|
@ -0,0 +1,104 @@
|
|||
/*
|
||||
* Copyright (C) 2003 Sistina Software Limited.
|
||||
* Copyright (C) 2004-2008 Red Hat, Inc. All rights reserved.
|
||||
*
|
||||
* Device-Mapper dirty region hash interface.
|
||||
*
|
||||
* This file is released under the GPL.
|
||||
*/
|
||||
|
||||
#ifndef DM_REGION_HASH_H
|
||||
#define DM_REGION_HASH_H
|
||||
|
||||
#include <linux/dm-dirty-log.h>
|
||||
|
||||
/*-----------------------------------------------------------------
|
||||
* Region hash
|
||||
*----------------------------------------------------------------*/
|
||||
struct dm_region_hash;
|
||||
struct dm_region;
|
||||
|
||||
/*
|
||||
* States a region can have.
|
||||
*/
|
||||
enum dm_rh_region_states {
|
||||
DM_RH_CLEAN = 0x01, /* No writes in flight. */
|
||||
DM_RH_DIRTY = 0x02, /* Writes in flight. */
|
||||
DM_RH_NOSYNC = 0x04, /* Out of sync. */
|
||||
DM_RH_RECOVERING = 0x08, /* Under resynchronization. */
|
||||
};
|
||||
|
||||
/*
|
||||
* Region hash create/destroy.
|
||||
*/
|
||||
struct bio_list;
|
||||
struct dm_region_hash *dm_region_hash_create(
|
||||
void *context, void (*dispatch_bios)(void *context,
|
||||
struct bio_list *bios),
|
||||
void (*wakeup_workers)(void *context),
|
||||
void (*wakeup_all_recovery_waiters)(void *context),
|
||||
sector_t target_begin, unsigned max_recovery,
|
||||
struct dm_dirty_log *log, uint32_t region_size,
|
||||
region_t nr_regions);
|
||||
void dm_region_hash_destroy(struct dm_region_hash *rh);
|
||||
|
||||
struct dm_dirty_log *dm_rh_dirty_log(struct dm_region_hash *rh);
|
||||
|
||||
/*
|
||||
* Conversion functions.
|
||||
*/
|
||||
region_t dm_rh_bio_to_region(struct dm_region_hash *rh, struct bio *bio);
|
||||
sector_t dm_rh_region_to_sector(struct dm_region_hash *rh, region_t region);
|
||||
void *dm_rh_region_context(struct dm_region *reg);
|
||||
|
||||
/*
|
||||
* Get region size and key (ie. number of the region).
|
||||
*/
|
||||
sector_t dm_rh_get_region_size(struct dm_region_hash *rh);
|
||||
region_t dm_rh_get_region_key(struct dm_region *reg);
|
||||
|
||||
/*
|
||||
* Get/set/update region state (and dirty log).
|
||||
*
|
||||
*/
|
||||
int dm_rh_get_state(struct dm_region_hash *rh, region_t region, int may_block);
|
||||
void dm_rh_set_state(struct dm_region_hash *rh, region_t region,
|
||||
enum dm_rh_region_states state, int may_block);
|
||||
|
||||
/* Non-zero errors_handled leaves the state of the region NOSYNC */
|
||||
void dm_rh_update_states(struct dm_region_hash *rh, int errors_handled);
|
||||
|
||||
/* Flush the region hash and dirty log. */
|
||||
int dm_rh_flush(struct dm_region_hash *rh);
|
||||
|
||||
/* Inc/dec pending count on regions. */
|
||||
void dm_rh_inc_pending(struct dm_region_hash *rh, struct bio_list *bios);
|
||||
void dm_rh_dec(struct dm_region_hash *rh, region_t region);
|
||||
|
||||
/* Delay bios on regions. */
|
||||
void dm_rh_delay(struct dm_region_hash *rh, struct bio *bio);
|
||||
|
||||
void dm_rh_mark_nosync(struct dm_region_hash *rh,
|
||||
struct bio *bio, unsigned done, int error);
|
||||
|
||||
/*
|
||||
* Region recovery control.
|
||||
*/
|
||||
|
||||
/* Prepare some regions for recovery by starting to quiesce them. */
|
||||
void dm_rh_recovery_prepare(struct dm_region_hash *rh);
|
||||
|
||||
/* Try fetching a quiesced region for recovery. */
|
||||
struct dm_region *dm_rh_recovery_start(struct dm_region_hash *rh);
|
||||
|
||||
/* Report recovery end on a region. */
|
||||
void dm_rh_recovery_end(struct dm_region *reg, int error);
|
||||
|
||||
/* Returns number of regions with recovery work outstanding. */
|
||||
int dm_rh_recovery_in_flight(struct dm_region_hash *rh);
|
||||
|
||||
/* Start/stop recovery. */
|
||||
void dm_rh_start_recovery(struct dm_region_hash *rh);
|
||||
void dm_rh_stop_recovery(struct dm_region_hash *rh);
|
||||
|
||||
#endif /* DM_REGION_HASH_H */
|
Loading…
Reference in New Issue