md: Support write-intent bitmaps with externally managed metadata.

In this case, the metadata needs to not be in the same
sector as the bitmap.
md will not read/write any bitmap metadata.  Config must be
done via sysfs and when a recovery makes the array non-degraded
again, writing 'true' to 'bitmap/can_clear' will allow bits in
the bitmap to be cleared again.

Signed-off-by: NeilBrown <neilb@suse.de>
This commit is contained in:
NeilBrown 2009-12-14 12:49:56 +11:00
parent 624ce4f565
commit ece5cff0da
4 changed files with 137 additions and 33 deletions

View File

@ -322,6 +322,22 @@ All md devices contain:
'backlog' sets a limit on the number of concurrent background
writes. If there are more than this, new writes will by
synchronous.
bitmap/metadata
This can be either 'internal' or 'external'.
'internal' is the default and means the metadata for the bitmap
is stored in the first 256 bytes of the allocated space and is
managed by the md module.
'external' means that bitmap metadata is managed externally to
the kernel (i.e. by some userspace program)
bitmap/can_clear
This is either 'true' or 'false'. If 'true', then bits in the
bitmap will be cleared when the corresponding blocks are thought
to be in-sync. If 'false', bits will never be cleared.
This is automatically set to 'false' if a write happens on a
degraded array, or if the array becomes degraded during a write.
When metadata is managed externally, it should be set to true
once the array becomes non-degraded, and this fact has been
recorded in the metadata.

View File

@ -497,6 +497,8 @@ void bitmap_update_sb(struct bitmap *bitmap)
if (!bitmap || !bitmap->mddev) /* no bitmap for this array */
return;
if (bitmap->mddev->bitmap_info.external)
return;
spin_lock_irqsave(&bitmap->lock, flags);
if (!bitmap->sb_page) { /* no superblock */
spin_unlock_irqrestore(&bitmap->lock, flags);
@ -676,16 +678,26 @@ static int bitmap_mask_state(struct bitmap *bitmap, enum bitmap_state bits,
* general bitmap file operations
*/
/*
* on-disk bitmap:
*
* Use one bit per "chunk" (block set). We do the disk I/O on the bitmap
* file a page at a time. There's a superblock at the start of the file.
*/
/* calculate the index of the page that contains this bit */
static inline unsigned long file_page_index(unsigned long chunk)
static inline unsigned long file_page_index(struct bitmap *bitmap, unsigned long chunk)
{
return CHUNK_BIT_OFFSET(chunk) >> PAGE_BIT_SHIFT;
if (!bitmap->mddev->bitmap_info.external)
chunk += sizeof(bitmap_super_t) << 3;
return chunk >> PAGE_BIT_SHIFT;
}
/* calculate the (bit) offset of this bit within a page */
static inline unsigned long file_page_offset(unsigned long chunk)
static inline unsigned long file_page_offset(struct bitmap *bitmap, unsigned long chunk)
{
return CHUNK_BIT_OFFSET(chunk) & (PAGE_BITS - 1);
if (!bitmap->mddev->bitmap_info.external)
chunk += sizeof(bitmap_super_t) << 3;
return chunk & (PAGE_BITS - 1);
}
/*
@ -698,8 +710,9 @@ static inline unsigned long file_page_offset(unsigned long chunk)
static inline struct page *filemap_get_page(struct bitmap *bitmap,
unsigned long chunk)
{
if (file_page_index(chunk) >= bitmap->file_pages) return NULL;
return bitmap->filemap[file_page_index(chunk) - file_page_index(0)];
if (file_page_index(bitmap, chunk) >= bitmap->file_pages) return NULL;
return bitmap->filemap[file_page_index(bitmap, chunk)
- file_page_index(bitmap, 0)];
}
@ -722,7 +735,7 @@ static void bitmap_file_unmap(struct bitmap *bitmap)
spin_unlock_irqrestore(&bitmap->lock, flags);
while (pages--)
if (map[pages]->index != 0) /* 0 is sb_page, release it below */
if (map[pages] != sb_page) /* 0 is sb_page, release it below */
free_buffers(map[pages]);
kfree(map);
kfree(attr);
@ -833,7 +846,7 @@ static void bitmap_file_set_bit(struct bitmap *bitmap, sector_t block)
page = filemap_get_page(bitmap, chunk);
if (!page) return;
bit = file_page_offset(chunk);
bit = file_page_offset(bitmap, chunk);
/* set the bit */
kaddr = kmap_atomic(page, KM_USER0);
@ -931,14 +944,17 @@ static int bitmap_init_from_disk(struct bitmap *bitmap, sector_t start)
"recovery\n", bmname(bitmap));
bytes = (chunks + 7) / 8;
if (!bitmap->mddev->bitmap_info.external)
bytes += sizeof(bitmap_super_t);
num_pages = (bytes + sizeof(bitmap_super_t) + PAGE_SIZE - 1) / PAGE_SIZE;
if (file && i_size_read(file->f_mapping->host) < bytes + sizeof(bitmap_super_t)) {
num_pages = (bytes + PAGE_SIZE - 1) / PAGE_SIZE;
if (file && i_size_read(file->f_mapping->host) < bytes) {
printk(KERN_INFO "%s: bitmap file too short %lu < %lu\n",
bmname(bitmap),
(unsigned long) i_size_read(file->f_mapping->host),
bytes + sizeof(bitmap_super_t));
bytes);
goto err;
}
@ -959,17 +975,16 @@ static int bitmap_init_from_disk(struct bitmap *bitmap, sector_t start)
for (i = 0; i < chunks; i++) {
int b;
index = file_page_index(i);
bit = file_page_offset(i);
index = file_page_index(bitmap, i);
bit = file_page_offset(bitmap, i);
if (index != oldindex) { /* this is a new page, read it in */
int count;
/* unmap the old page, we're done with it */
if (index == num_pages-1)
count = bytes + sizeof(bitmap_super_t)
- index * PAGE_SIZE;
count = bytes - index * PAGE_SIZE;
else
count = PAGE_SIZE;
if (index == 0) {
if (index == 0 && bitmap->sb_page) {
/*
* if we're here then the superblock page
* contains some bits (PAGE_SIZE != sizeof sb)
@ -1164,7 +1179,8 @@ void bitmap_daemon_work(mddev_t *mddev)
/* We are possibly going to clear some bits, so make
* sure that events_cleared is up-to-date.
*/
if (bitmap->need_sync) {
if (bitmap->need_sync &&
bitmap->mddev->bitmap_info.external == 0) {
bitmap_super_t *sb;
bitmap->need_sync = 0;
sb = kmap_atomic(bitmap->sb_page, KM_USER0);
@ -1174,6 +1190,7 @@ void bitmap_daemon_work(mddev_t *mddev)
write_page(bitmap, bitmap->sb_page, 1);
}
spin_lock_irqsave(&bitmap->lock, flags);
if (!bitmap->need_sync)
clear_page_attr(bitmap, page, BITMAP_PAGE_CLEAN);
}
bmc = bitmap_get_counter(bitmap,
@ -1189,7 +1206,7 @@ void bitmap_daemon_work(mddev_t *mddev)
if (*bmc == 2) {
*bmc=1; /* maybe clear the bit next time */
set_page_attr(bitmap, page, BITMAP_PAGE_CLEAN);
} else if (*bmc == 1) {
} else if (*bmc == 1 && !bitmap->need_sync) {
/* we can clear the bit */
*bmc = 0;
bitmap_count_page(bitmap,
@ -1199,9 +1216,11 @@ void bitmap_daemon_work(mddev_t *mddev)
/* clear the bit */
paddr = kmap_atomic(page, KM_USER0);
if (bitmap->flags & BITMAP_HOSTENDIAN)
clear_bit(file_page_offset(j), paddr);
clear_bit(file_page_offset(bitmap, j),
paddr);
else
ext2_clear_bit(file_page_offset(j), paddr);
ext2_clear_bit(file_page_offset(bitmap, j),
paddr);
kunmap_atomic(paddr, KM_USER0);
}
} else
@ -1356,6 +1375,7 @@ void bitmap_endwrite(struct bitmap *bitmap, sector_t offset, unsigned long secto
bitmap->events_cleared < bitmap->mddev->events) {
bitmap->events_cleared = bitmap->mddev->events;
bitmap->need_sync = 1;
sysfs_notify_dirent(bitmap->sysfs_can_clear);
}
if (!success && ! (*bmc & NEEDED_MASK))
@ -1613,6 +1633,9 @@ void bitmap_destroy(mddev_t *mddev)
if (mddev->thread)
mddev->thread->timeout = MAX_SCHEDULE_TIMEOUT;
if (bitmap->sysfs_can_clear)
sysfs_put(bitmap->sysfs_can_clear);
bitmap_free(bitmap);
}
@ -1629,6 +1652,7 @@ int bitmap_create(mddev_t *mddev)
struct file *file = mddev->bitmap_info.file;
int err;
sector_t start;
struct sysfs_dirent *bm;
BUILD_BUG_ON(sizeof(bitmap_super_t) != 256);
@ -1648,6 +1672,13 @@ int bitmap_create(mddev_t *mddev)
bitmap->mddev = mddev;
bm = sysfs_get_dirent(mddev->kobj.sd, "bitmap");
if (bm) {
bitmap->sysfs_can_clear = sysfs_get_dirent(bm, "can_clear");
sysfs_put(bm);
} else
bitmap->sysfs_can_clear = NULL;
bitmap->file = file;
if (file) {
get_file(file);
@ -1658,7 +1689,16 @@ int bitmap_create(mddev_t *mddev)
vfs_fsync(file, file->f_dentry, 1);
}
/* read superblock from bitmap file (this sets mddev->bitmap_info.chunksize) */
if (!mddev->bitmap_info.external)
err = bitmap_read_sb(bitmap);
else {
err = 0;
if (mddev->bitmap_info.chunksize == 0 ||
mddev->bitmap_info.daemon_sleep == 0)
/* chunksize and time_base need to be
* set first. */
err = -EINVAL;
}
if (err)
goto error;
@ -1777,7 +1817,8 @@ location_store(mddev_t *mddev, const char *buf, size_t len)
return rv;
if (offset == 0)
return -EINVAL;
if (mddev->major_version == 0 &&
if (mddev->bitmap_info.external == 0 &&
mddev->major_version == 0 &&
offset != mddev->bitmap_info.default_offset)
return -EINVAL;
mddev->bitmap_info.offset = offset;
@ -1906,11 +1947,66 @@ chunksize_store(mddev_t *mddev, const char *buf, size_t len)
static struct md_sysfs_entry bitmap_chunksize =
__ATTR(chunksize, S_IRUGO|S_IWUSR, chunksize_show, chunksize_store);
static ssize_t metadata_show(mddev_t *mddev, char *page)
{
return sprintf(page, "%s\n", (mddev->bitmap_info.external
? "external" : "internal"));
}
static ssize_t metadata_store(mddev_t *mddev, const char *buf, size_t len)
{
if (mddev->bitmap ||
mddev->bitmap_info.file ||
mddev->bitmap_info.offset)
return -EBUSY;
if (strncmp(buf, "external", 8) == 0)
mddev->bitmap_info.external = 1;
else if (strncmp(buf, "internal", 8) == 0)
mddev->bitmap_info.external = 0;
else
return -EINVAL;
return len;
}
static struct md_sysfs_entry bitmap_metadata =
__ATTR(metadata, S_IRUGO|S_IWUSR, metadata_show, metadata_store);
static ssize_t can_clear_show(mddev_t *mddev, char *page)
{
int len;
if (mddev->bitmap)
len = sprintf(page, "%s\n", (mddev->bitmap->need_sync ?
"false" : "true"));
else
len = sprintf(page, "\n");
return len;
}
static ssize_t can_clear_store(mddev_t *mddev, const char *buf, size_t len)
{
if (mddev->bitmap == NULL)
return -ENOENT;
if (strncmp(buf, "false", 5) == 0)
mddev->bitmap->need_sync = 1;
else if (strncmp(buf, "true", 4) == 0) {
if (mddev->degraded)
return -EBUSY;
mddev->bitmap->need_sync = 0;
} else
return -EINVAL;
return len;
}
static struct md_sysfs_entry bitmap_can_clear =
__ATTR(can_clear, S_IRUGO|S_IWUSR, can_clear_show, can_clear_store);
static struct attribute *md_bitmap_attrs[] = {
&bitmap_location.attr,
&bitmap_timeout.attr,
&bitmap_backlog.attr,
&bitmap_chunksize.attr,
&bitmap_metadata.attr,
&bitmap_can_clear.attr,
NULL
};
struct attribute_group md_bitmap_group = {

View File

@ -118,16 +118,6 @@ typedef __u16 bitmap_counter_t;
(CHUNK_BLOCK_SHIFT(bitmap) + PAGE_COUNTER_SHIFT - 1)
#define PAGEPTR_BLOCK_MASK(bitmap) (PAGEPTR_BLOCK_RATIO(bitmap) - 1)
/*
* on-disk bitmap:
*
* Use one bit per "chunk" (block set). We do the disk I/O on the bitmap
* file a page at a time. There's a superblock at the start of the file.
*/
/* map chunks (bits) to file pages - offset by the size of the superblock */
#define CHUNK_BIT_OFFSET(chunk) ((chunk) + (sizeof(bitmap_super_t) << 3))
#endif
/*
@ -250,6 +240,7 @@ struct bitmap {
wait_queue_head_t write_wait;
wait_queue_head_t overflow_wait;
struct sysfs_dirent *sysfs_can_clear;
};
/* the bitmap API */

View File

@ -296,6 +296,7 @@ struct mddev_s
unsigned long chunksize;
unsigned long daemon_sleep; /* how many seconds between updates? */
unsigned long max_write_behind; /* write-behind mode */
int external;
} bitmap_info;
struct list_head all_mddevs;