Merge branch 'for-jens' of http://evilpiepirate.org/git/linux-bcache into for-3.17/drivers

Kent writes:

Hey Jens, here's the pull request for 3.17 - typically late, but lots of
tasty fixes in this one.
This commit is contained in:
Jens Axboe 2014-08-05 11:02:05 -06:00
commit 99d540018c
15 changed files with 131 additions and 74 deletions

View File

@ -331,7 +331,7 @@ static int bch_allocator_thread(void *arg)
mutex_unlock(&ca->set->bucket_lock);
blkdev_issue_discard(ca->bdev,
bucket_to_sector(ca->set, bucket),
ca->sb.block_size, GFP_KERNEL, 0);
ca->sb.bucket_size, GFP_KERNEL, 0);
mutex_lock(&ca->set->bucket_lock);
}

View File

@ -477,9 +477,13 @@ struct gc_stat {
* CACHE_SET_STOPPING always gets set first when we're closing down a cache set;
* we'll continue to run normally for awhile with CACHE_SET_STOPPING set (i.e.
* flushing dirty data).
*
* CACHE_SET_RUNNING means all cache devices have been registered and journal
* replay is complete.
*/
#define CACHE_SET_UNREGISTERING 0
#define CACHE_SET_STOPPING 1
#define CACHE_SET_RUNNING 2
struct cache_set {
struct closure cl;

View File

@ -1182,7 +1182,7 @@ static void __btree_sort(struct btree_keys *b, struct btree_iter *iter,
{
uint64_t start_time;
bool used_mempool = false;
struct bset *out = (void *) __get_free_pages(__GFP_NOWARN|GFP_NOIO,
struct bset *out = (void *) __get_free_pages(__GFP_NOWARN|GFP_NOWAIT,
order);
if (!out) {
struct page *outp;

View File

@ -453,7 +453,7 @@ static inline bool bch_bkey_equal_header(const struct bkey *l,
{
return (KEY_DIRTY(l) == KEY_DIRTY(r) &&
KEY_PTRS(l) == KEY_PTRS(r) &&
KEY_CSUM(l) == KEY_CSUM(l));
KEY_CSUM(l) == KEY_CSUM(r));
}
/* Keylists */

View File

@ -117,9 +117,9 @@
({ \
int _r, l = (b)->level - 1; \
bool _w = l <= (op)->lock; \
struct btree *_child = bch_btree_node_get((b)->c, op, key, l, _w);\
struct btree *_child = bch_btree_node_get((b)->c, op, key, l, \
_w, b); \
if (!IS_ERR(_child)) { \
_child->parent = (b); \
_r = bch_btree_ ## fn(_child, op, ##__VA_ARGS__); \
rw_unlock(_w, _child); \
} else \
@ -142,7 +142,6 @@
rw_lock(_w, _b, _b->level); \
if (_b == (c)->root && \
_w == insert_lock(op, _b)) { \
_b->parent = NULL; \
_r = bch_btree_ ## fn(_b, op, ##__VA_ARGS__); \
} \
rw_unlock(_w, _b); \
@ -202,7 +201,7 @@ void bch_btree_node_read_done(struct btree *b)
struct bset *i = btree_bset_first(b);
struct btree_iter *iter;
iter = mempool_alloc(b->c->fill_iter, GFP_NOWAIT);
iter = mempool_alloc(b->c->fill_iter, GFP_NOIO);
iter->size = b->c->sb.bucket_size / b->c->sb.block_size;
iter->used = 0;
@ -421,7 +420,7 @@ static void do_btree_node_write(struct btree *b)
SET_PTR_OFFSET(&k.key, 0, PTR_OFFSET(&k.key, 0) +
bset_sector_offset(&b->keys, i));
if (!bio_alloc_pages(b->bio, GFP_NOIO)) {
if (!bio_alloc_pages(b->bio, __GFP_NOWARN|GFP_NOWAIT)) {
int j;
struct bio_vec *bv;
void *base = (void *) ((unsigned long) i & ~(PAGE_SIZE - 1));
@ -967,7 +966,8 @@ err:
* level and op->lock.
*/
struct btree *bch_btree_node_get(struct cache_set *c, struct btree_op *op,
struct bkey *k, int level, bool write)
struct bkey *k, int level, bool write,
struct btree *parent)
{
int i = 0;
struct btree *b;
@ -1002,6 +1002,7 @@ retry:
BUG_ON(b->level != level);
}
b->parent = parent;
b->accessed = 1;
for (; i <= b->keys.nsets && b->keys.set[i].size; i++) {
@ -1022,15 +1023,16 @@ retry:
return b;
}
static void btree_node_prefetch(struct cache_set *c, struct bkey *k, int level)
static void btree_node_prefetch(struct btree *parent, struct bkey *k)
{
struct btree *b;
mutex_lock(&c->bucket_lock);
b = mca_alloc(c, NULL, k, level);
mutex_unlock(&c->bucket_lock);
mutex_lock(&parent->c->bucket_lock);
b = mca_alloc(parent->c, NULL, k, parent->level - 1);
mutex_unlock(&parent->c->bucket_lock);
if (!IS_ERR_OR_NULL(b)) {
b->parent = parent;
bch_btree_node_read(b);
rw_unlock(true, b);
}
@ -1060,15 +1062,16 @@ static void btree_node_free(struct btree *b)
mutex_unlock(&b->c->bucket_lock);
}
struct btree *bch_btree_node_alloc(struct cache_set *c, struct btree_op *op,
int level)
struct btree *__bch_btree_node_alloc(struct cache_set *c, struct btree_op *op,
int level, bool wait,
struct btree *parent)
{
BKEY_PADDED(key) k;
struct btree *b = ERR_PTR(-EAGAIN);
mutex_lock(&c->bucket_lock);
retry:
if (__bch_bucket_alloc_set(c, RESERVE_BTREE, &k.key, 1, op != NULL))
if (__bch_bucket_alloc_set(c, RESERVE_BTREE, &k.key, 1, wait))
goto err;
bkey_put(c, &k.key);
@ -1085,6 +1088,7 @@ retry:
}
b->accessed = 1;
b->parent = parent;
bch_bset_init_next(&b->keys, b->keys.set->data, bset_magic(&b->c->sb));
mutex_unlock(&c->bucket_lock);
@ -1096,14 +1100,21 @@ err_free:
err:
mutex_unlock(&c->bucket_lock);
trace_bcache_btree_node_alloc_fail(b);
trace_bcache_btree_node_alloc_fail(c);
return b;
}
static struct btree *bch_btree_node_alloc(struct cache_set *c,
struct btree_op *op, int level,
struct btree *parent)
{
return __bch_btree_node_alloc(c, op, level, op != NULL, parent);
}
static struct btree *btree_node_alloc_replacement(struct btree *b,
struct btree_op *op)
{
struct btree *n = bch_btree_node_alloc(b->c, op, b->level);
struct btree *n = bch_btree_node_alloc(b->c, op, b->level, b->parent);
if (!IS_ERR_OR_NULL(n)) {
mutex_lock(&n->write_lock);
bch_btree_sort_into(&b->keys, &n->keys, &b->c->sort);
@ -1403,6 +1414,7 @@ static int btree_gc_coalesce(struct btree *b, struct btree_op *op,
BUG_ON(btree_bset_first(new_nodes[0])->keys);
btree_node_free(new_nodes[0]);
rw_unlock(true, new_nodes[0]);
new_nodes[0] = NULL;
for (i = 0; i < nodes; i++) {
if (__bch_keylist_realloc(&keylist, bkey_u64s(&r[i].b->key)))
@ -1516,7 +1528,7 @@ static int btree_gc_recurse(struct btree *b, struct btree_op *op,
k = bch_btree_iter_next_filter(&iter, &b->keys, bch_ptr_bad);
if (k) {
r->b = bch_btree_node_get(b->c, op, k, b->level - 1,
true);
true, b);
if (IS_ERR(r->b)) {
ret = PTR_ERR(r->b);
break;
@ -1811,7 +1823,7 @@ static int bch_btree_check_recurse(struct btree *b, struct btree_op *op)
k = bch_btree_iter_next_filter(&iter, &b->keys,
bch_ptr_bad);
if (k)
btree_node_prefetch(b->c, k, b->level - 1);
btree_node_prefetch(b, k);
if (p)
ret = btree(check_recurse, p, b, op);
@ -1976,12 +1988,12 @@ static int btree_split(struct btree *b, struct btree_op *op,
trace_bcache_btree_node_split(b, btree_bset_first(n1)->keys);
n2 = bch_btree_node_alloc(b->c, op, b->level);
n2 = bch_btree_node_alloc(b->c, op, b->level, b->parent);
if (IS_ERR(n2))
goto err_free1;
if (!b->parent) {
n3 = bch_btree_node_alloc(b->c, op, b->level + 1);
n3 = bch_btree_node_alloc(b->c, op, b->level + 1, NULL);
if (IS_ERR(n3))
goto err_free2;
}

View File

@ -242,9 +242,10 @@ void __bch_btree_node_write(struct btree *, struct closure *);
void bch_btree_node_write(struct btree *, struct closure *);
void bch_btree_set_root(struct btree *);
struct btree *bch_btree_node_alloc(struct cache_set *, struct btree_op *, int);
struct btree *__bch_btree_node_alloc(struct cache_set *, struct btree_op *,
int, bool, struct btree *);
struct btree *bch_btree_node_get(struct cache_set *, struct btree_op *,
struct bkey *, int, bool);
struct bkey *, int, bool, struct btree *);
int bch_btree_insert_check_key(struct btree *, struct btree_op *,
struct bkey *);

View File

@ -474,9 +474,8 @@ out:
return false;
}
static bool bch_extent_invalid(struct btree_keys *bk, const struct bkey *k)
bool __bch_extent_invalid(struct cache_set *c, const struct bkey *k)
{
struct btree *b = container_of(bk, struct btree, keys);
char buf[80];
if (!KEY_SIZE(k))
@ -485,16 +484,22 @@ static bool bch_extent_invalid(struct btree_keys *bk, const struct bkey *k)
if (KEY_SIZE(k) > KEY_OFFSET(k))
goto bad;
if (__ptr_invalid(b->c, k))
if (__ptr_invalid(c, k))
goto bad;
return false;
bad:
bch_extent_to_text(buf, sizeof(buf), k);
cache_bug(b->c, "spotted extent %s: %s", buf, bch_ptr_status(b->c, k));
cache_bug(c, "spotted extent %s: %s", buf, bch_ptr_status(c, k));
return true;
}
static bool bch_extent_invalid(struct btree_keys *bk, const struct bkey *k)
{
struct btree *b = container_of(bk, struct btree, keys);
return __bch_extent_invalid(b->c, k);
}
static bool bch_extent_bad_expensive(struct btree *b, const struct bkey *k,
unsigned ptr)
{

View File

@ -9,5 +9,6 @@ struct cache_set;
void bch_extent_to_text(char *, size_t, const struct bkey *);
bool __bch_btree_ptr_invalid(struct cache_set *, const struct bkey *);
bool __bch_extent_invalid(struct cache_set *, const struct bkey *);
#endif /* _BCACHE_EXTENTS_H */

View File

@ -7,6 +7,7 @@
#include "bcache.h"
#include "btree.h"
#include "debug.h"
#include "extents.h"
#include <trace/events/bcache.h>
@ -189,11 +190,15 @@ int bch_journal_read(struct cache_set *c, struct list_head *list)
if (read_bucket(l))
goto bsearch;
if (list_empty(list))
/* no journal entries on this device? */
if (l == ca->sb.njournal_buckets)
continue;
bsearch:
BUG_ON(list_empty(list));
/* Binary search */
m = r = find_next_bit(bitmap, ca->sb.njournal_buckets, l + 1);
m = l;
r = find_next_bit(bitmap, ca->sb.njournal_buckets, l + 1);
pr_debug("starting binary search, l %u r %u", l, r);
while (l + 1 < r) {
@ -291,7 +296,8 @@ void bch_journal_mark(struct cache_set *c, struct list_head *list)
for (k = i->j.start;
k < bset_bkey_last(&i->j);
k = bkey_next(k)) {
k = bkey_next(k))
if (!__bch_extent_invalid(c, k)) {
unsigned j;
for (j = 0; j < KEY_PTRS(k); j++)

View File

@ -311,7 +311,8 @@ void bch_data_insert(struct closure *cl)
{
struct data_insert_op *op = container_of(cl, struct data_insert_op, cl);
trace_bcache_write(op->bio, op->writeback, op->bypass);
trace_bcache_write(op->c, op->inode, op->bio,
op->writeback, op->bypass);
bch_keylist_init(&op->insert_keys);
bio_get(op->bio);

View File

@ -733,8 +733,6 @@ static void bcache_device_detach(struct bcache_device *d)
static void bcache_device_attach(struct bcache_device *d, struct cache_set *c,
unsigned id)
{
BUG_ON(test_bit(CACHE_SET_STOPPING, &c->flags));
d->id = id;
d->c = c;
c->devices[id] = d;
@ -927,6 +925,7 @@ static void cached_dev_detach_finish(struct work_struct *w)
list_move(&dc->list, &uncached_devices);
clear_bit(BCACHE_DEV_DETACHING, &dc->disk.flags);
clear_bit(BCACHE_DEV_UNLINK_DONE, &dc->disk.flags);
mutex_unlock(&bch_register_lock);
@ -1041,6 +1040,9 @@ int bch_cached_dev_attach(struct cached_dev *dc, struct cache_set *c)
*/
atomic_set(&dc->count, 1);
if (bch_cached_dev_writeback_start(dc))
return -ENOMEM;
if (BDEV_STATE(&dc->sb) == BDEV_STATE_DIRTY) {
bch_sectors_dirty_init(dc);
atomic_set(&dc->has_dirty, 1);
@ -1070,6 +1072,7 @@ static void cached_dev_free(struct closure *cl)
struct cached_dev *dc = container_of(cl, struct cached_dev, disk.cl);
cancel_delayed_work_sync(&dc->writeback_rate_update);
if (!IS_ERR_OR_NULL(dc->writeback_thread))
kthread_stop(dc->writeback_thread);
mutex_lock(&bch_register_lock);
@ -1081,12 +1084,8 @@ static void cached_dev_free(struct closure *cl)
mutex_unlock(&bch_register_lock);
if (!IS_ERR_OR_NULL(dc->bdev)) {
if (dc->bdev->bd_disk)
blk_sync_queue(bdev_get_queue(dc->bdev));
if (!IS_ERR_OR_NULL(dc->bdev))
blkdev_put(dc->bdev, FMODE_READ|FMODE_WRITE|FMODE_EXCL);
}
wake_up(&unregister_wait);
@ -1213,7 +1212,9 @@ void bch_flash_dev_release(struct kobject *kobj)
static void flash_dev_free(struct closure *cl)
{
struct bcache_device *d = container_of(cl, struct bcache_device, cl);
mutex_lock(&bch_register_lock);
bcache_device_free(d);
mutex_unlock(&bch_register_lock);
kobject_put(&d->kobj);
}
@ -1221,7 +1222,9 @@ static void flash_dev_flush(struct closure *cl)
{
struct bcache_device *d = container_of(cl, struct bcache_device, cl);
mutex_lock(&bch_register_lock);
bcache_device_unlink(d);
mutex_unlock(&bch_register_lock);
kobject_del(&d->kobj);
continue_at(cl, flash_dev_free, system_wq);
}
@ -1277,6 +1280,9 @@ int bch_flash_dev_create(struct cache_set *c, uint64_t size)
if (test_bit(CACHE_SET_STOPPING, &c->flags))
return -EINTR;
if (!test_bit(CACHE_SET_RUNNING, &c->flags))
return -EPERM;
u = uuid_find_empty(c);
if (!u) {
pr_err("Can't create volume, no room for UUID");
@ -1346,8 +1352,11 @@ static void cache_set_free(struct closure *cl)
bch_journal_free(c);
for_each_cache(ca, c, i)
if (ca)
if (ca) {
ca->set = NULL;
c->cache[ca->sb.nr_this_dev] = NULL;
kobject_put(&ca->kobj);
}
bch_bset_sort_state_free(&c->sort);
free_pages((unsigned long) c->uuids, ilog2(bucket_pages(c)));
@ -1405,9 +1414,11 @@ static void cache_set_flush(struct closure *cl)
if (ca->alloc_thread)
kthread_stop(ca->alloc_thread);
if (c->journal.cur) {
cancel_delayed_work_sync(&c->journal.work);
/* flush last journal entry if needed */
c->journal.work.work.func(&c->journal.work.work);
}
closure_return(cl);
}
@ -1586,7 +1597,7 @@ static void run_cache_set(struct cache_set *c)
goto err;
err = "error reading btree root";
c->root = bch_btree_node_get(c, NULL, k, j->btree_level, true);
c->root = bch_btree_node_get(c, NULL, k, j->btree_level, true, NULL);
if (IS_ERR_OR_NULL(c->root))
goto err;
@ -1661,7 +1672,7 @@ static void run_cache_set(struct cache_set *c)
goto err;
err = "cannot allocate new btree root";
c->root = bch_btree_node_alloc(c, NULL, 0);
c->root = __bch_btree_node_alloc(c, NULL, 0, true, NULL);
if (IS_ERR_OR_NULL(c->root))
goto err;
@ -1697,6 +1708,7 @@ static void run_cache_set(struct cache_set *c)
flash_devs_run(c);
set_bit(CACHE_SET_RUNNING, &c->flags);
return;
err:
closure_sync(&cl);
@ -1760,6 +1772,7 @@ found:
pr_debug("set version = %llu", c->sb.version);
}
kobject_get(&ca->kobj);
ca->set = c;
ca->set->cache[ca->sb.nr_this_dev] = ca;
c->cache_by_alloc[c->caches_loaded++] = ca;
@ -1780,8 +1793,10 @@ void bch_cache_release(struct kobject *kobj)
struct cache *ca = container_of(kobj, struct cache, kobj);
unsigned i;
if (ca->set)
if (ca->set) {
BUG_ON(ca->set->cache[ca->sb.nr_this_dev] != ca);
ca->set->cache[ca->sb.nr_this_dev] = NULL;
}
bio_split_pool_free(&ca->bio_split_hook);
@ -1798,10 +1813,8 @@ void bch_cache_release(struct kobject *kobj)
if (ca->sb_bio.bi_inline_vecs[0].bv_page)
put_page(ca->sb_bio.bi_io_vec[0].bv_page);
if (!IS_ERR_OR_NULL(ca->bdev)) {
blk_sync_queue(bdev_get_queue(ca->bdev));
if (!IS_ERR_OR_NULL(ca->bdev))
blkdev_put(ca->bdev, FMODE_READ|FMODE_WRITE|FMODE_EXCL);
}
kfree(ca);
module_put(THIS_MODULE);
@ -1877,10 +1890,12 @@ static void register_cache(struct cache_sb *sb, struct page *sb_page,
goto err;
pr_info("registered cache device %s", bdevname(bdev, name));
out:
kobject_put(&ca->kobj);
return;
err:
pr_notice("error opening %s: %s", bdevname(bdev, name), err);
kobject_put(&ca->kobj);
goto out;
}
/* Global interfaces/init */
@ -1945,10 +1960,12 @@ static ssize_t register_bcache(struct kobject *k, struct kobj_attribute *attr,
if (IS_ERR(bdev)) {
if (bdev == ERR_PTR(-EBUSY)) {
bdev = lookup_bdev(strim(path));
mutex_lock(&bch_register_lock);
if (!IS_ERR(bdev) && bch_is_open(bdev))
err = "device already registered";
else
err = "device busy";
mutex_unlock(&bch_register_lock);
}
goto err;
}

View File

@ -416,8 +416,8 @@ do { \
average_frequency, frequency_units); \
__print_time_stat(stats, name, \
average_duration, duration_units); \
__print_time_stat(stats, name, \
max_duration, duration_units); \
sysfs_print(name ## _ ##max_duration ## _ ## duration_units, \
div_u64((stats)->max_duration, NSEC_PER_ ## duration_units));\
\
sysfs_print(name ## _last_ ## frequency_units, (stats)->last \
? div_s64(local_clock() - (stats)->last, \

View File

@ -239,7 +239,7 @@ static void read_dirty(struct cached_dev *dc)
if (KEY_START(&w->key) != dc->last_read ||
jiffies_to_msecs(delay) > 50)
while (!kthread_should_stop() && delay)
delay = schedule_timeout_uninterruptible(delay);
delay = schedule_timeout_interruptible(delay);
dc->last_read = KEY_OFFSET(&w->key);
@ -436,7 +436,7 @@ static int bch_writeback_thread(void *arg)
while (delay &&
!kthread_should_stop() &&
!test_bit(BCACHE_DEV_DETACHING, &dc->disk.flags))
delay = schedule_timeout_uninterruptible(delay);
delay = schedule_timeout_interruptible(delay);
}
}
@ -478,7 +478,7 @@ void bch_sectors_dirty_init(struct cached_dev *dc)
dc->disk.sectors_dirty_last = bcache_dev_sectors_dirty(&dc->disk);
}
int bch_cached_dev_writeback_init(struct cached_dev *dc)
void bch_cached_dev_writeback_init(struct cached_dev *dc)
{
sema_init(&dc->in_flight, 64);
init_rwsem(&dc->writeback_lock);
@ -494,14 +494,20 @@ int bch_cached_dev_writeback_init(struct cached_dev *dc)
dc->writeback_rate_d_term = 30;
dc->writeback_rate_p_term_inverse = 6000;
INIT_DELAYED_WORK(&dc->writeback_rate_update, update_writeback_rate);
}
int bch_cached_dev_writeback_start(struct cached_dev *dc)
{
dc->writeback_thread = kthread_create(bch_writeback_thread, dc,
"bcache_writeback");
if (IS_ERR(dc->writeback_thread))
return PTR_ERR(dc->writeback_thread);
INIT_DELAYED_WORK(&dc->writeback_rate_update, update_writeback_rate);
schedule_delayed_work(&dc->writeback_rate_update,
dc->writeback_rate_update_seconds * HZ);
bch_writeback_queue(dc);
return 0;
}

View File

@ -85,6 +85,7 @@ static inline void bch_writeback_add(struct cached_dev *dc)
void bcache_dev_sectors_dirty_add(struct cache_set *, unsigned, uint64_t, int);
void bch_sectors_dirty_init(struct cached_dev *dc);
int bch_cached_dev_writeback_init(struct cached_dev *);
void bch_cached_dev_writeback_init(struct cached_dev *);
int bch_cached_dev_writeback_start(struct cached_dev *);
#endif

View File

@ -148,11 +148,13 @@ TRACE_EVENT(bcache_read,
);
TRACE_EVENT(bcache_write,
TP_PROTO(struct bio *bio, bool writeback, bool bypass),
TP_ARGS(bio, writeback, bypass),
TP_PROTO(struct cache_set *c, u64 inode, struct bio *bio,
bool writeback, bool bypass),
TP_ARGS(c, inode, bio, writeback, bypass),
TP_STRUCT__entry(
__field(dev_t, dev )
__array(char, uuid, 16 )
__field(u64, inode )
__field(sector_t, sector )
__field(unsigned int, nr_sector )
__array(char, rwbs, 6 )
@ -161,7 +163,8 @@ TRACE_EVENT(bcache_write,
),
TP_fast_assign(
__entry->dev = bio->bi_bdev->bd_dev;
memcpy(__entry->uuid, c->sb.set_uuid, 16);
__entry->inode = inode;
__entry->sector = bio->bi_iter.bi_sector;
__entry->nr_sector = bio->bi_iter.bi_size >> 9;
blk_fill_rwbs(__entry->rwbs, bio->bi_rw, bio->bi_iter.bi_size);
@ -169,8 +172,8 @@ TRACE_EVENT(bcache_write,
__entry->bypass = bypass;
),
TP_printk("%d,%d %s %llu + %u hit %u bypass %u",
MAJOR(__entry->dev), MINOR(__entry->dev),
TP_printk("%pU inode %llu %s %llu + %u hit %u bypass %u",
__entry->uuid, __entry->inode,
__entry->rwbs, (unsigned long long)__entry->sector,
__entry->nr_sector, __entry->writeback, __entry->bypass)
);
@ -258,9 +261,9 @@ DEFINE_EVENT(btree_node, bcache_btree_node_alloc,
TP_ARGS(b)
);
DEFINE_EVENT(btree_node, bcache_btree_node_alloc_fail,
TP_PROTO(struct btree *b),
TP_ARGS(b)
DEFINE_EVENT(cache_set, bcache_btree_node_alloc_fail,
TP_PROTO(struct cache_set *c),
TP_ARGS(c)
);
DEFINE_EVENT(btree_node, bcache_btree_node_free,