Merge branch 'bcache-for-upstream' of git://evilpiepirate.org/~kent/linux-bcache into for-linus

Kent writes:

Jens - couple more bcache patches. Bug fixes and a doc update.
This commit is contained in:
Jens Axboe 2013-05-15 10:36:25 +02:00
commit c0a363f5cf
6 changed files with 109 additions and 127 deletions

View File

@ -319,7 +319,10 @@ cache<0..n>
Symlink to each of the cache devices comprising this cache set. Symlink to each of the cache devices comprising this cache set.
cache_available_percent cache_available_percent
Percentage of cache device free. Percentage of cache device which doesn't contain dirty data, and could
potentially be used for writeback. This doesn't mean this space isn't used
for clean cached data; the unused statistic (in priority_stats) is typically
much lower.
clear_stats clear_stats
Clears the statistics associated with this cache Clears the statistics associated with this cache
@ -423,8 +426,11 @@ nbuckets
Total buckets in this cache Total buckets in this cache
priority_stats priority_stats
Statistics about how recently data in the cache has been accessed. This can Statistics about how recently data in the cache has been accessed.
reveal your working set size. This can reveal your working set size. Unused is the percentage of
the cache that doesn't contain any data. Metadata is bcache's
metadata overhead. Average is the average priority of cache buckets.
Next is a list of quantiles with the priority threshold of each.
written written
Sum of all data that has been written to the cache; comparison with Sum of all data that has been written to the cache; comparison with

View File

@ -1,7 +1,6 @@
config BCACHE config BCACHE
tristate "Block device as cache" tristate "Block device as cache"
select CLOSURES
---help--- ---help---
Allows a block device to be used as cache for other devices; uses Allows a block device to be used as cache for other devices; uses
a btree for indexing and the layout is optimized for SSDs. a btree for indexing and the layout is optimized for SSDs.

View File

@ -1241,7 +1241,7 @@ void bch_cache_set_stop(struct cache_set *);
struct cache_set *bch_cache_set_alloc(struct cache_sb *); struct cache_set *bch_cache_set_alloc(struct cache_sb *);
void bch_btree_cache_free(struct cache_set *); void bch_btree_cache_free(struct cache_set *);
int bch_btree_cache_alloc(struct cache_set *); int bch_btree_cache_alloc(struct cache_set *);
void bch_writeback_init_cached_dev(struct cached_dev *); void bch_cached_dev_writeback_init(struct cached_dev *);
void bch_moving_init_cache_set(struct cache_set *); void bch_moving_init_cache_set(struct cache_set *);
void bch_cache_allocator_exit(struct cache *ca); void bch_cache_allocator_exit(struct cache *ca);

View File

@ -93,24 +93,6 @@ static struct attribute *bch_stats_files[] = {
}; };
static KTYPE(bch_stats); static KTYPE(bch_stats);
static void scale_accounting(unsigned long data);
void bch_cache_accounting_init(struct cache_accounting *acc,
struct closure *parent)
{
kobject_init(&acc->total.kobj, &bch_stats_ktype);
kobject_init(&acc->five_minute.kobj, &bch_stats_ktype);
kobject_init(&acc->hour.kobj, &bch_stats_ktype);
kobject_init(&acc->day.kobj, &bch_stats_ktype);
closure_init(&acc->cl, parent);
init_timer(&acc->timer);
acc->timer.expires = jiffies + accounting_delay;
acc->timer.data = (unsigned long) acc;
acc->timer.function = scale_accounting;
add_timer(&acc->timer);
}
int bch_cache_accounting_add_kobjs(struct cache_accounting *acc, int bch_cache_accounting_add_kobjs(struct cache_accounting *acc,
struct kobject *parent) struct kobject *parent)
{ {
@ -244,3 +226,19 @@ void bch_mark_sectors_bypassed(struct search *s, int sectors)
atomic_add(sectors, &dc->accounting.collector.sectors_bypassed); atomic_add(sectors, &dc->accounting.collector.sectors_bypassed);
atomic_add(sectors, &s->op.c->accounting.collector.sectors_bypassed); atomic_add(sectors, &s->op.c->accounting.collector.sectors_bypassed);
} }
void bch_cache_accounting_init(struct cache_accounting *acc,
struct closure *parent)
{
kobject_init(&acc->total.kobj, &bch_stats_ktype);
kobject_init(&acc->five_minute.kobj, &bch_stats_ktype);
kobject_init(&acc->hour.kobj, &bch_stats_ktype);
kobject_init(&acc->day.kobj, &bch_stats_ktype);
closure_init(&acc->cl, parent);
init_timer(&acc->timer);
acc->timer.expires = jiffies + accounting_delay;
acc->timer.data = (unsigned long) acc;
acc->timer.function = scale_accounting;
add_timer(&acc->timer);
}

View File

@ -634,11 +634,10 @@ static int open_dev(struct block_device *b, fmode_t mode)
return 0; return 0;
} }
static int release_dev(struct gendisk *b, fmode_t mode) static void release_dev(struct gendisk *b, fmode_t mode)
{ {
struct bcache_device *d = b->private_data; struct bcache_device *d = b->private_data;
closure_put(&d->cl); closure_put(&d->cl);
return 0;
} }
static int ioctl_dev(struct block_device *b, fmode_t mode, static int ioctl_dev(struct block_device *b, fmode_t mode,
@ -732,8 +731,7 @@ static void bcache_device_free(struct bcache_device *d)
if (d->c) if (d->c)
bcache_device_detach(d); bcache_device_detach(d);
if (d->disk && d->disk->flags & GENHD_FL_UP)
if (d->disk)
del_gendisk(d->disk); del_gendisk(d->disk);
if (d->disk && d->disk->queue) if (d->disk && d->disk->queue)
blk_cleanup_queue(d->disk->queue); blk_cleanup_queue(d->disk->queue);
@ -756,12 +754,9 @@ static int bcache_device_init(struct bcache_device *d, unsigned block_size)
if (!(d->bio_split = bioset_create(4, offsetof(struct bbio, bio))) || if (!(d->bio_split = bioset_create(4, offsetof(struct bbio, bio))) ||
!(d->unaligned_bvec = mempool_create_kmalloc_pool(1, !(d->unaligned_bvec = mempool_create_kmalloc_pool(1,
sizeof(struct bio_vec) * BIO_MAX_PAGES)) || sizeof(struct bio_vec) * BIO_MAX_PAGES)) ||
bio_split_pool_init(&d->bio_split_hook)) bio_split_pool_init(&d->bio_split_hook) ||
!(d->disk = alloc_disk(1)) ||
return -ENOMEM; !(q = blk_alloc_queue(GFP_KERNEL)))
d->disk = alloc_disk(1);
if (!d->disk)
return -ENOMEM; return -ENOMEM;
snprintf(d->disk->disk_name, DISK_NAME_LEN, "bcache%i", bcache_minor); snprintf(d->disk->disk_name, DISK_NAME_LEN, "bcache%i", bcache_minor);
@ -771,10 +766,6 @@ static int bcache_device_init(struct bcache_device *d, unsigned block_size)
d->disk->fops = &bcache_ops; d->disk->fops = &bcache_ops;
d->disk->private_data = d; d->disk->private_data = d;
q = blk_alloc_queue(GFP_KERNEL);
if (!q)
return -ENOMEM;
blk_queue_make_request(q, NULL); blk_queue_make_request(q, NULL);
d->disk->queue = q; d->disk->queue = q;
q->queuedata = d; q->queuedata = d;
@ -999,6 +990,7 @@ static void cached_dev_free(struct closure *cl)
mutex_lock(&bch_register_lock); mutex_lock(&bch_register_lock);
if (atomic_read(&dc->running))
bd_unlink_disk_holder(dc->bdev, dc->disk.disk); bd_unlink_disk_holder(dc->bdev, dc->disk.disk);
bcache_device_free(&dc->disk); bcache_device_free(&dc->disk);
list_del(&dc->list); list_del(&dc->list);
@ -1006,7 +998,9 @@ static void cached_dev_free(struct closure *cl)
mutex_unlock(&bch_register_lock); mutex_unlock(&bch_register_lock);
if (!IS_ERR_OR_NULL(dc->bdev)) { if (!IS_ERR_OR_NULL(dc->bdev)) {
if (dc->bdev->bd_disk)
blk_sync_queue(bdev_get_queue(dc->bdev)); blk_sync_queue(bdev_get_queue(dc->bdev));
blkdev_put(dc->bdev, FMODE_READ|FMODE_WRITE|FMODE_EXCL); blkdev_put(dc->bdev, FMODE_READ|FMODE_WRITE|FMODE_EXCL);
} }
@ -1028,73 +1022,67 @@ static void cached_dev_flush(struct closure *cl)
static int cached_dev_init(struct cached_dev *dc, unsigned block_size) static int cached_dev_init(struct cached_dev *dc, unsigned block_size)
{ {
int err; int ret;
struct io *io; struct io *io;
struct request_queue *q = bdev_get_queue(dc->bdev);
closure_init(&dc->disk.cl, NULL);
set_closure_fn(&dc->disk.cl, cached_dev_flush, system_wq);
__module_get(THIS_MODULE); __module_get(THIS_MODULE);
INIT_LIST_HEAD(&dc->list); INIT_LIST_HEAD(&dc->list);
closure_init(&dc->disk.cl, NULL);
set_closure_fn(&dc->disk.cl, cached_dev_flush, system_wq);
kobject_init(&dc->disk.kobj, &bch_cached_dev_ktype); kobject_init(&dc->disk.kobj, &bch_cached_dev_ktype);
bch_cache_accounting_init(&dc->accounting, &dc->disk.cl);
err = bcache_device_init(&dc->disk, block_size);
if (err)
goto err;
spin_lock_init(&dc->io_lock);
closure_init_unlocked(&dc->sb_write);
INIT_WORK(&dc->detach, cached_dev_detach_finish); INIT_WORK(&dc->detach, cached_dev_detach_finish);
closure_init_unlocked(&dc->sb_write);
INIT_LIST_HEAD(&dc->io_lru);
spin_lock_init(&dc->io_lock);
bch_cache_accounting_init(&dc->accounting, &dc->disk.cl);
dc->sequential_merge = true; dc->sequential_merge = true;
dc->sequential_cutoff = 4 << 20; dc->sequential_cutoff = 4 << 20;
INIT_LIST_HEAD(&dc->io_lru);
dc->sb_bio.bi_max_vecs = 1;
dc->sb_bio.bi_io_vec = dc->sb_bio.bi_inline_vecs;
for (io = dc->io; io < dc->io + RECENT_IO; io++) { for (io = dc->io; io < dc->io + RECENT_IO; io++) {
list_add(&io->lru, &dc->io_lru); list_add(&io->lru, &dc->io_lru);
hlist_add_head(&io->hash, dc->io_hash + RECENT_IO); hlist_add_head(&io->hash, dc->io_hash + RECENT_IO);
} }
bch_writeback_init_cached_dev(dc); ret = bcache_device_init(&dc->disk, block_size);
if (ret)
return ret;
set_capacity(dc->disk.disk,
dc->bdev->bd_part->nr_sects - dc->sb.data_offset);
dc->disk.disk->queue->backing_dev_info.ra_pages =
max(dc->disk.disk->queue->backing_dev_info.ra_pages,
q->backing_dev_info.ra_pages);
bch_cached_dev_request_init(dc);
bch_cached_dev_writeback_init(dc);
return 0; return 0;
err:
bcache_device_stop(&dc->disk);
return err;
} }
/* Cached device - bcache superblock */ /* Cached device - bcache superblock */
static const char *register_bdev(struct cache_sb *sb, struct page *sb_page, static void register_bdev(struct cache_sb *sb, struct page *sb_page,
struct block_device *bdev, struct block_device *bdev,
struct cached_dev *dc) struct cached_dev *dc)
{ {
char name[BDEVNAME_SIZE]; char name[BDEVNAME_SIZE];
const char *err = "cannot allocate memory"; const char *err = "cannot allocate memory";
struct gendisk *g;
struct cache_set *c; struct cache_set *c;
if (!dc || cached_dev_init(dc, sb->block_size << 9) != 0)
return err;
memcpy(&dc->sb, sb, sizeof(struct cache_sb)); memcpy(&dc->sb, sb, sizeof(struct cache_sb));
dc->sb_bio.bi_io_vec[0].bv_page = sb_page;
dc->bdev = bdev; dc->bdev = bdev;
dc->bdev->bd_holder = dc; dc->bdev->bd_holder = dc;
g = dc->disk.disk; bio_init(&dc->sb_bio);
dc->sb_bio.bi_max_vecs = 1;
dc->sb_bio.bi_io_vec = dc->sb_bio.bi_inline_vecs;
dc->sb_bio.bi_io_vec[0].bv_page = sb_page;
get_page(sb_page);
set_capacity(g, dc->bdev->bd_part->nr_sects - dc->sb.data_offset); if (cached_dev_init(dc, sb->block_size << 9))
goto err;
g->queue->backing_dev_info.ra_pages =
max(g->queue->backing_dev_info.ra_pages,
bdev->bd_queue->backing_dev_info.ra_pages);
bch_cached_dev_request_init(dc);
err = "error creating kobject"; err = "error creating kobject";
if (kobject_add(&dc->disk.kobj, &part_to_dev(bdev->bd_part)->kobj, if (kobject_add(&dc->disk.kobj, &part_to_dev(bdev->bd_part)->kobj,
@ -1103,6 +1091,8 @@ static const char *register_bdev(struct cache_sb *sb, struct page *sb_page,
if (bch_cache_accounting_add_kobjs(&dc->accounting, &dc->disk.kobj)) if (bch_cache_accounting_add_kobjs(&dc->accounting, &dc->disk.kobj))
goto err; goto err;
pr_info("registered backing device %s", bdevname(bdev, name));
list_add(&dc->list, &uncached_devices); list_add(&dc->list, &uncached_devices);
list_for_each_entry(c, &bch_cache_sets, list) list_for_each_entry(c, &bch_cache_sets, list)
bch_cached_dev_attach(dc, c); bch_cached_dev_attach(dc, c);
@ -1111,15 +1101,10 @@ static const char *register_bdev(struct cache_sb *sb, struct page *sb_page,
BDEV_STATE(&dc->sb) == BDEV_STATE_STALE) BDEV_STATE(&dc->sb) == BDEV_STATE_STALE)
bch_cached_dev_run(dc); bch_cached_dev_run(dc);
return NULL; return;
err: err:
kobject_put(&dc->disk.kobj);
pr_notice("error opening %s: %s", bdevname(bdev, name), err); pr_notice("error opening %s: %s", bdevname(bdev, name), err);
/* bcache_device_stop(&dc->disk);
* Return NULL instead of an error because kobject_put() cleans
* everything up
*/
return NULL;
} }
/* Flash only volumes */ /* Flash only volumes */
@ -1717,20 +1702,11 @@ static int cache_alloc(struct cache_sb *sb, struct cache *ca)
size_t free; size_t free;
struct bucket *b; struct bucket *b;
if (!ca)
return -ENOMEM;
__module_get(THIS_MODULE); __module_get(THIS_MODULE);
kobject_init(&ca->kobj, &bch_cache_ktype); kobject_init(&ca->kobj, &bch_cache_ktype);
memcpy(&ca->sb, sb, sizeof(struct cache_sb));
INIT_LIST_HEAD(&ca->discards); INIT_LIST_HEAD(&ca->discards);
bio_init(&ca->sb_bio);
ca->sb_bio.bi_max_vecs = 1;
ca->sb_bio.bi_io_vec = ca->sb_bio.bi_inline_vecs;
bio_init(&ca->journal.bio); bio_init(&ca->journal.bio);
ca->journal.bio.bi_max_vecs = 8; ca->journal.bio.bi_max_vecs = 8;
ca->journal.bio.bi_io_vec = ca->journal.bio.bi_inline_vecs; ca->journal.bio.bi_io_vec = ca->journal.bio.bi_inline_vecs;
@ -1742,18 +1718,17 @@ static int cache_alloc(struct cache_sb *sb, struct cache *ca)
!init_fifo(&ca->free_inc, free << 2, GFP_KERNEL) || !init_fifo(&ca->free_inc, free << 2, GFP_KERNEL) ||
!init_fifo(&ca->unused, free << 2, GFP_KERNEL) || !init_fifo(&ca->unused, free << 2, GFP_KERNEL) ||
!init_heap(&ca->heap, free << 3, GFP_KERNEL) || !init_heap(&ca->heap, free << 3, GFP_KERNEL) ||
!(ca->buckets = vmalloc(sizeof(struct bucket) * !(ca->buckets = vzalloc(sizeof(struct bucket) *
ca->sb.nbuckets)) || ca->sb.nbuckets)) ||
!(ca->prio_buckets = kzalloc(sizeof(uint64_t) * prio_buckets(ca) * !(ca->prio_buckets = kzalloc(sizeof(uint64_t) * prio_buckets(ca) *
2, GFP_KERNEL)) || 2, GFP_KERNEL)) ||
!(ca->disk_buckets = alloc_bucket_pages(GFP_KERNEL, ca)) || !(ca->disk_buckets = alloc_bucket_pages(GFP_KERNEL, ca)) ||
!(ca->alloc_workqueue = alloc_workqueue("bch_allocator", 0, 1)) || !(ca->alloc_workqueue = alloc_workqueue("bch_allocator", 0, 1)) ||
bio_split_pool_init(&ca->bio_split_hook)) bio_split_pool_init(&ca->bio_split_hook))
goto err; return -ENOMEM;
ca->prio_last_buckets = ca->prio_buckets + prio_buckets(ca); ca->prio_last_buckets = ca->prio_buckets + prio_buckets(ca);
memset(ca->buckets, 0, ca->sb.nbuckets * sizeof(struct bucket));
for_each_bucket(b, ca) for_each_bucket(b, ca)
atomic_set(&b->pin, 0); atomic_set(&b->pin, 0);
@ -1766,22 +1741,28 @@ err:
return -ENOMEM; return -ENOMEM;
} }
static const char *register_cache(struct cache_sb *sb, struct page *sb_page, static void register_cache(struct cache_sb *sb, struct page *sb_page,
struct block_device *bdev, struct cache *ca) struct block_device *bdev, struct cache *ca)
{ {
char name[BDEVNAME_SIZE]; char name[BDEVNAME_SIZE];
const char *err = "cannot allocate memory"; const char *err = "cannot allocate memory";
if (cache_alloc(sb, ca) != 0) memcpy(&ca->sb, sb, sizeof(struct cache_sb));
return err;
ca->sb_bio.bi_io_vec[0].bv_page = sb_page;
ca->bdev = bdev; ca->bdev = bdev;
ca->bdev->bd_holder = ca; ca->bdev->bd_holder = ca;
bio_init(&ca->sb_bio);
ca->sb_bio.bi_max_vecs = 1;
ca->sb_bio.bi_io_vec = ca->sb_bio.bi_inline_vecs;
ca->sb_bio.bi_io_vec[0].bv_page = sb_page;
get_page(sb_page);
if (blk_queue_discard(bdev_get_queue(ca->bdev))) if (blk_queue_discard(bdev_get_queue(ca->bdev)))
ca->discard = CACHE_DISCARD(&ca->sb); ca->discard = CACHE_DISCARD(&ca->sb);
if (cache_alloc(sb, ca) != 0)
goto err;
err = "error creating kobject"; err = "error creating kobject";
if (kobject_add(&ca->kobj, &part_to_dev(bdev->bd_part)->kobj, "bcache")) if (kobject_add(&ca->kobj, &part_to_dev(bdev->bd_part)->kobj, "bcache"))
goto err; goto err;
@ -1791,15 +1772,10 @@ static const char *register_cache(struct cache_sb *sb, struct page *sb_page,
goto err; goto err;
pr_info("registered cache device %s", bdevname(bdev, name)); pr_info("registered cache device %s", bdevname(bdev, name));
return;
return NULL;
err: err:
pr_notice("error opening %s: %s", bdevname(bdev, name), err);
kobject_put(&ca->kobj); kobject_put(&ca->kobj);
pr_info("error opening %s: %s", bdevname(bdev, name), err);
/* Return NULL instead of an error because kobject_put() cleans
* everything up
*/
return NULL;
} }
/* Global interfaces/init */ /* Global interfaces/init */
@ -1833,12 +1809,15 @@ static ssize_t register_bcache(struct kobject *k, struct kobj_attribute *attr,
bdev = blkdev_get_by_path(strim(path), bdev = blkdev_get_by_path(strim(path),
FMODE_READ|FMODE_WRITE|FMODE_EXCL, FMODE_READ|FMODE_WRITE|FMODE_EXCL,
sb); sb);
if (IS_ERR(bdev)) {
if (bdev == ERR_PTR(-EBUSY)) if (bdev == ERR_PTR(-EBUSY))
err = "device busy"; err = "device busy";
if (IS_ERR(bdev) ||
set_blocksize(bdev, 4096))
goto err; goto err;
}
err = "failed to set blocksize";
if (set_blocksize(bdev, 4096))
goto err_close;
err = read_super(sb, bdev, &sb_page); err = read_super(sb, bdev, &sb_page);
if (err) if (err)
@ -1846,33 +1825,33 @@ static ssize_t register_bcache(struct kobject *k, struct kobj_attribute *attr,
if (SB_IS_BDEV(sb)) { if (SB_IS_BDEV(sb)) {
struct cached_dev *dc = kzalloc(sizeof(*dc), GFP_KERNEL); struct cached_dev *dc = kzalloc(sizeof(*dc), GFP_KERNEL);
if (!dc)
goto err_close;
err = register_bdev(sb, sb_page, bdev, dc); register_bdev(sb, sb_page, bdev, dc);
} else { } else {
struct cache *ca = kzalloc(sizeof(*ca), GFP_KERNEL); struct cache *ca = kzalloc(sizeof(*ca), GFP_KERNEL);
if (!ca)
goto err_close;
err = register_cache(sb, sb_page, bdev, ca); register_cache(sb, sb_page, bdev, ca);
} }
out:
if (err) { if (sb_page)
/* register_(bdev|cache) will only return an error if they
* didn't get far enough to create the kobject - if they did,
* the kobject destructor will do this cleanup.
*/
put_page(sb_page); put_page(sb_page);
kfree(sb);
kfree(path);
mutex_unlock(&bch_register_lock);
module_put(THIS_MODULE);
return ret;
err_close: err_close:
blkdev_put(bdev, FMODE_READ|FMODE_WRITE|FMODE_EXCL); blkdev_put(bdev, FMODE_READ|FMODE_WRITE|FMODE_EXCL);
err: err:
if (attr != &ksysfs_register_quiet) if (attr != &ksysfs_register_quiet)
pr_info("error opening %s: %s", path, err); pr_info("error opening %s: %s", path, err);
ret = -EINVAL; ret = -EINVAL;
} goto out;
kfree(sb);
kfree(path);
mutex_unlock(&bch_register_lock);
module_put(THIS_MODULE);
return ret;
} }
static int bcache_reboot(struct notifier_block *n, unsigned long code, void *x) static int bcache_reboot(struct notifier_block *n, unsigned long code, void *x)

View File

@ -375,7 +375,7 @@ err:
refill_dirty(cl); refill_dirty(cl);
} }
void bch_writeback_init_cached_dev(struct cached_dev *dc) void bch_cached_dev_writeback_init(struct cached_dev *dc)
{ {
closure_init_unlocked(&dc->writeback); closure_init_unlocked(&dc->writeback);
init_rwsem(&dc->writeback_lock); init_rwsem(&dc->writeback_lock);